Load diabetes data csv

diabetes_data = read.csv("diabetes_binary.csv")

Check dataframe details with variables list

str(diabetes_data)
'data.frame':   70692 obs. of  22 variables:
 $ Diabetes_binary     : num  0 0 0 0 0 0 0 0 0 0 ...
 $ HighBP              : num  1 1 0 1 0 0 0 0 0 0 ...
 $ HighChol            : num  0 1 0 1 0 0 1 0 0 0 ...
 $ CholCheck           : num  1 1 1 1 1 1 1 1 1 1 ...
 $ BMI                 : num  26 26 26 28 29 18 26 31 32 27 ...
 $ Smoker              : num  0 1 0 1 1 0 1 1 0 1 ...
 $ Stroke              : num  0 1 0 0 0 0 0 0 0 0 ...
 $ HeartDiseaseorAttack: num  0 0 0 0 0 0 0 0 0 0 ...
 $ PhysActivity        : num  1 0 1 1 1 1 1 0 1 0 ...
 $ Fruits              : num  0 1 1 1 1 1 1 1 1 1 ...
 $ Veggies             : num  1 0 1 1 1 1 1 1 1 1 ...
 $ HvyAlcoholConsump   : num  0 0 0 0 0 0 1 0 0 0 ...
 $ AnyHealthcare       : num  1 1 1 1 1 0 1 1 1 1 ...
 $ NoDocbcCost         : num  0 0 0 0 0 0 0 0 0 0 ...
 $ GenHlth             : num  3 3 1 3 2 2 1 4 3 3 ...
 $ MentHlth            : num  5 0 0 0 0 7 0 0 0 0 ...
 $ PhysHlth            : num  30 0 10 3 0 0 0 0 0 6 ...
 $ DiffWalk            : num  0 0 0 0 0 0 0 0 0 0 ...
 $ Sex                 : num  1 1 1 1 0 0 1 1 0 1 ...
 $ Age                 : num  4 12 13 11 8 1 13 6 3 6 ...
 $ Education           : num  6 6 6 6 5 4 5 4 6 4 ...
 $ Income              : num  8 8 8 8 8 7 6 3 8 4 ...

Convert categorical variable in fators

cols_skip <- c('BMI', 'GenHlth', 'MentHlth', 'PhysHlth', 'Age', 'Education', 'Income')
cols_skip_indices <- which(names(diabetes_data) %in% cols_skip)
diabetes_data[, -cols_skip_indices] <- lapply(diabetes_data[, -cols_skip_indices], factor)
str(diabetes_data)
'data.frame':   70692 obs. of  22 variables:
 $ Diabetes_binary     : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
 $ HighBP              : Factor w/ 2 levels "0","1": 2 2 1 2 1 1 1 1 1 1 ...
 $ HighChol            : Factor w/ 2 levels "0","1": 1 2 1 2 1 1 2 1 1 1 ...
 $ CholCheck           : Factor w/ 2 levels "0","1": 2 2 2 2 2 2 2 2 2 2 ...
 $ BMI                 : num  26 26 26 28 29 18 26 31 32 27 ...
 $ Smoker              : Factor w/ 2 levels "0","1": 1 2 1 2 2 1 2 2 1 2 ...
 $ Stroke              : Factor w/ 2 levels "0","1": 1 2 1 1 1 1 1 1 1 1 ...
 $ HeartDiseaseorAttack: Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
 $ PhysActivity        : Factor w/ 2 levels "0","1": 2 1 2 2 2 2 2 1 2 1 ...
 $ Fruits              : Factor w/ 2 levels "0","1": 1 2 2 2 2 2 2 2 2 2 ...
 $ Veggies             : Factor w/ 2 levels "0","1": 2 1 2 2 2 2 2 2 2 2 ...
 $ HvyAlcoholConsump   : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 2 1 1 1 ...
 $ AnyHealthcare       : Factor w/ 2 levels "0","1": 2 2 2 2 2 1 2 2 2 2 ...
 $ NoDocbcCost         : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
 $ GenHlth             : num  3 3 1 3 2 2 1 4 3 3 ...
 $ MentHlth            : num  5 0 0 0 0 7 0 0 0 0 ...
 $ PhysHlth            : num  30 0 10 3 0 0 0 0 0 6 ...
 $ DiffWalk            : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
 $ Sex                 : Factor w/ 2 levels "0","1": 2 2 2 2 1 1 2 2 1 2 ...
 $ Age                 : num  4 12 13 11 8 1 13 6 3 6 ...
 $ Education           : num  6 6 6 6 5 4 5 4 6 4 ...
 $ Income              : num  8 8 8 8 8 7 6 3 8 4 ...

Check diabetes dataset summary

summary(diabetes_data)
 Diabetes_binary HighBP    HighChol  CholCheck      BMI        Smoker    Stroke    HeartDiseaseorAttack
 0:35346         0:30860   0:33529   0: 1749   Min.   :12.00   0:37094   0:66297   0:60243             
 1:35346         1:39832   1:37163   1:68943   1st Qu.:25.00   1:33598   1: 4395   1:10449             
                                               Median :29.00                                           
                                               Mean   :29.86                                           
                                               3rd Qu.:33.00                                           
                                               Max.   :98.00                                           
 PhysActivity Fruits    Veggies   HvyAlcoholConsump AnyHealthcare NoDocbcCost    GenHlth     
 0:20993      0:27443   0:14932   0:67672           0: 3184       0:64053     Min.   :1.000  
 1:49699      1:43249   1:55760   1: 3020           1:67508       1: 6639     1st Qu.:2.000  
                                                                              Median :3.000  
                                                                              Mean   :2.837  
                                                                              3rd Qu.:4.000  
                                                                              Max.   :5.000  
    MentHlth         PhysHlth     DiffWalk  Sex            Age           Education         Income     
 Min.   : 0.000   Min.   : 0.00   0:52826   0:38386   Min.   : 1.000   Min.   :1.000   Min.   :1.000  
 1st Qu.: 0.000   1st Qu.: 0.00   1:17866   1:32306   1st Qu.: 7.000   1st Qu.:4.000   1st Qu.:4.000  
 Median : 0.000   Median : 0.00                       Median : 9.000   Median :5.000   Median :6.000  
 Mean   : 3.752   Mean   : 5.81                       Mean   : 8.584   Mean   :4.921   Mean   :5.698  
 3rd Qu.: 2.000   3rd Qu.: 6.00                       3rd Qu.:11.000   3rd Qu.:6.000   3rd Qu.:8.000  
 Max.   :30.000   Max.   :30.00                       Max.   :13.000   Max.   :6.000   Max.   :8.000  

Check Diabetes data is balanced or imbalanced

proportions(table(diabetes_data$Diabetes_binary))

  0   1 
0.5 0.5 
pie(proportions(table(diabetes_data$Diabetes_binary)), labels = c('Non-Diabetes', 'Diabetes'), col = c('green', 'red'))

In the above observation, we can see that the diabetes data is balanced. Data is distributed equally in both non-diabetic and diabetic data (50% each).

Display list of categorical and numerical variables

numerical_variables <- array(, dim = c(0))
categorical_variables <- array(, dim = c(0))

for (c in colnames(diabetes_data)) 
{
  if (is.numeric(diabetes_data[,c])){
      numerical_variables <- c(numerical_variables, c)
  } else if (is.factor(diabetes_data[,c])){
    categorical_variables <- c(categorical_variables, c)
  }
}
cat("Categorical variables:", categorical_variables, "\n","\n")
Categorical variables: Diabetes_binary HighBP HighChol CholCheck Smoker Stroke HeartDiseaseorAttack PhysActivity Fruits Veggies HvyAlcoholConsump AnyHealthcare NoDocbcCost DiffWalk Sex 
 
cat("Numerical variables:", numerical_variables, "\n")
Numerical variables: BMI GenHlth MentHlth PhysHlth Age Education Income 

Check is there any missing attributes in dataset.

missing_counts <- colSums(is.na(diabetes_data))

cat("Missing Counts in each columns:", "\n")
Missing Counts in each columns: 
print(missing_counts)
     Diabetes_binary               HighBP             HighChol            CholCheck                  BMI 
                   0                    0                    0                    0                    0 
              Smoker               Stroke HeartDiseaseorAttack         PhysActivity               Fruits 
                   0                    0                    0                    0                    0 
             Veggies    HvyAlcoholConsump        AnyHealthcare          NoDocbcCost              GenHlth 
                   0                    0                    0                    0                    0 
            MentHlth             PhysHlth             DiffWalk                  Sex                  Age 
                   0                    0                    0                    0                    0 
           Education               Income 
                   0                    0 

Use statistical test and plots to find relations between diabetes and other variables. Also removing variables from the dataset which have very weak relation with diabetes variables.

diabetes_indices <- which(names(diabetes_data)=='Diabetes_binary')
for (c in colnames(diabetes_data[, -diabetes_indices])) 
{
  if (is.factor(diabetes_data[,c])){
    try({
       pvalue = chisq.test(diabetes_data$Diabetes_binary, diabetes_data[,c])
       cat('pvalue of the chi-square test b/w',c,"and Diabetes is:", pvalue$p.value, '\n')
       mosaicplot(diabetes_data$Diabetes_binary~diabetes_data[,c], shade=TRUE, main=paste("Mosaic Plot of Diabetes vs", c), xlab="Diabetes", ylab=c, las=1) 
    })
  }
  else if (is.numeric(diabetes_data[,c])){
    try({
      pvalue = oneway.test(diabetes_data[,c]~diabetes_data$Diabetes_binary)
      cat('pvalue of the oneway test b/w',c,"and Diabetes is:", pvalue$p.value, '\n')
      boxplot(diabetes_data$Diabetes_binary, diabetes_data[,c], col = '#69b3a2', xlab="Diabetes", ylab=c, main=paste("Box Plot of Diabetes vs", c))
    })
  }
  if (pvalue$p.value > 0.05) {
    housing_data[[c]] <- NULL
    cat('\n', 'Removing',c,"from dataset as it's p-value is greater than 0.05:", pvalue$p.value, '\n')
  }
}
pvalue of the chi-square test b/w HighBP and Diabetes is: 0 
pvalue of the chi-square test b/w HighChol and Diabetes is: 0 

pvalue of the chi-square test b/w CholCheck and Diabetes is: 2.379871e-206 

pvalue of the oneway test b/w BMI and Diabetes is: 0 

pvalue of the chi-square test b/w Smoker and Diabetes is: 1.221105e-115 

pvalue of the chi-square test b/w Stroke and Diabetes is: 1.290837e-243 

pvalue of the chi-square test b/w HeartDiseaseorAttack and Diabetes is: 0 

pvalue of the chi-square test b/w PhysActivity and Diabetes is: 0 

pvalue of the chi-square test b/w Fruits and Diabetes is: 7.967065e-47 

pvalue of the chi-square test b/w Veggies and Diabetes is: 1.40071e-98 

pvalue of the chi-square test b/w HvyAlcoholConsump and Diabetes is: 3.913396e-140 

pvalue of the chi-square test b/w AnyHealthcare and Diabetes is: 7.855834e-10 

pvalue of the chi-square test b/w NoDocbcCost and Diabetes is: 1.405326e-27 

pvalue of the oneway test b/w GenHlth and Diabetes is: 0 

pvalue of the oneway test b/w MentHlth and Diabetes is: 7.117624e-119 

pvalue of the oneway test b/w PhysHlth and Diabetes is: 0 

pvalue of the chi-square test b/w DiffWalk and Diabetes is: 0 

pvalue of the chi-square test b/w Sex and Diabetes is: 3.860396e-32 

pvalue of the oneway test b/w Age and Diabetes is: 0 

pvalue of the oneway test b/w Education and Diabetes is: 0 

pvalue of the oneway test b/w Income and Diabetes is: 0 

Partition dataset in training and test data using caret function.

library(caret)
Loading required package: ggplot2
Loading required package: lattice
partition_indices = createDataPartition(diabetes_data$Diabetes_binary, p=0.8, list = FALSE)

diabetes_train_data = diabetes_data[partition_indices, ]
diabetes_train_data
diabetes_test_data = diabetes_data[-partition_indices, ]
diabetes_test_data
true_labels = diabetes_test_data$Diabetes_binary

Training and testing knn model using overscalling data using smote sampling in train control function.

library(caret)

trainControl <- trainControl(method = "cv", number = 5)
knn_model <- train(Diabetes_binary ~ ., data = diabetes_train_data, method = "knn", trControl = trainControl)
knn_predictions <- predict(knn_model, newdata = diabetes_test_data)
knn_confusion_matrix_class0 <- confusionMatrix(knn_predictions, true_labels, mode='everything')
print(knn_confusion_matrix_class0)
Confusion Matrix and Statistics

          Reference
Prediction    0    1
         0 4862 1689
         1 2207 5380
                                         
               Accuracy : 0.7244         
                 95% CI : (0.717, 0.7318)
    No Information Rate : 0.5            
    P-Value [Acc > NIR] : < 2.2e-16      
                                         
                  Kappa : 0.4489         
                                         
 Mcnemar's Test P-Value : < 2.2e-16      
                                         
            Sensitivity : 0.6878         
            Specificity : 0.7611         
         Pos Pred Value : 0.7422         
         Neg Pred Value : 0.7091         
              Precision : 0.7422         
                 Recall : 0.6878         
                     F1 : 0.7140         
             Prevalence : 0.5000         
         Detection Rate : 0.3439         
   Detection Prevalence : 0.4634         
      Balanced Accuracy : 0.7244         
                                         
       'Positive' Class : 0              
                                         
knn_confusion_matrix_class1 <- confusionMatrix(knn_predictions, true_labels, positive = '1', mode='everything')
print(knn_confusion_matrix_class1)
Confusion Matrix and Statistics

          Reference
Prediction    0    1
         0 4862 1689
         1 2207 5380
                                         
               Accuracy : 0.7244         
                 95% CI : (0.717, 0.7318)
    No Information Rate : 0.5            
    P-Value [Acc > NIR] : < 2.2e-16      
                                         
                  Kappa : 0.4489         
                                         
 Mcnemar's Test P-Value : < 2.2e-16      
                                         
            Sensitivity : 0.7611         
            Specificity : 0.6878         
         Pos Pred Value : 0.7091         
         Neg Pred Value : 0.7422         
              Precision : 0.7091         
                 Recall : 0.7611         
                     F1 : 0.7342         
             Prevalence : 0.5000         
         Detection Rate : 0.3805         
   Detection Prevalence : 0.5366         
      Balanced Accuracy : 0.7244         
                                         
       'Positive' Class : 1              
                                         

Lasso model

library(glmnet)
Loading required package: Matrix
Loaded glmnet 4.1-6
library(caret)
set.seed(1)
tr = trainControl(method = "cv", number = 5, preProc = "nzv")
tg = expand.grid(alpha = 1, lambda = 10^seq(-4, -2, length =100))
lasso_model <- train(Diabetes_binary ~ ., data = diabetes_train_data, method = "glmnet", trControl = tr, tuneGrid = tg)
lasso_model
glmnet 

56554 samples
   21 predictor
    2 classes: '0', '1' 

No pre-processing
Resampling: Cross-Validated (5 fold) 
Summary of sample sizes: 45244, 45242, 45244, 45243, 45243 
Resampling results across tuning parameters:

  lambda        Accuracy   Kappa    
  0.0001000000  0.7471091  0.4942182
  0.0001047616  0.7471091  0.4942182
  0.0001097499  0.7471091  0.4942182
  0.0001149757  0.7471091  0.4942182
  0.0001204504  0.7471091  0.4942182
  0.0001261857  0.7471091  0.4942182
  0.0001321941  0.7471091  0.4942182
  0.0001384886  0.7471091  0.4942182
  0.0001450829  0.7471091  0.4942182
  0.0001519911  0.7471091  0.4942182
  0.0001592283  0.7471091  0.4942182
  0.0001668101  0.7471091  0.4942182
  0.0001747528  0.7471091  0.4942182
  0.0001830738  0.7471091  0.4942182
  0.0001917910  0.7471091  0.4942182
  0.0002009233  0.7471091  0.4942182
  0.0002104904  0.7471091  0.4942182
  0.0002205131  0.7471091  0.4942182
  0.0002310130  0.7471091  0.4942182
  0.0002420128  0.7471091  0.4942182
  0.0002535364  0.7471091  0.4942182
  0.0002656088  0.7471091  0.4942182
  0.0002782559  0.7471091  0.4942182
  0.0002915053  0.7471091  0.4942182
  0.0003053856  0.7471091  0.4942182
  0.0003199267  0.7471091  0.4942182
  0.0003351603  0.7471091  0.4942182
  0.0003511192  0.7471091  0.4942182
  0.0003678380  0.7471091  0.4942182
  0.0003853529  0.7471091  0.4942182
  0.0004037017  0.7471091  0.4942182
  0.0004229243  0.7471091  0.4942182
  0.0004430621  0.7471091  0.4942182
  0.0004641589  0.7471091  0.4942182
  0.0004862602  0.7471091  0.4942182
  0.0005094138  0.7471091  0.4942182
  0.0005336699  0.7471091  0.4942182
  0.0005590810  0.7471091  0.4942182
  0.0005857021  0.7471091  0.4942182
  0.0006135907  0.7471091  0.4942182
  0.0006428073  0.7471091  0.4942182
  0.0006734151  0.7471091  0.4942182
  0.0007054802  0.7470914  0.4941828
  0.0007390722  0.7471091  0.4942182
  0.0007742637  0.7470914  0.4941828
  0.0008111308  0.7470914  0.4941828
  0.0008497534  0.7470206  0.4940414
  0.0008902151  0.7469853  0.4939706
  0.0009326033  0.7469853  0.4939706
  0.0009770100  0.7470383  0.4940767
  0.0010235310  0.7471090  0.4942182
  0.0010722672  0.7471267  0.4942536
  0.0011233240  0.7471621  0.4943243
  0.0011768120  0.7471798  0.4943596
  0.0012328467  0.7471975  0.4943950
  0.0012915497  0.7471621  0.4943243
  0.0013530478  0.7471090  0.4942182
  0.0014174742  0.7470737  0.4941474
  0.0014849683  0.7470383  0.4940767
  0.0015556761  0.7471444  0.4942889
  0.0016297508  0.7471267  0.4942535
  0.0017073526  0.7470737  0.4941474
  0.0017886495  0.7471621  0.4943242
  0.0018738174  0.7470737  0.4941474
  0.0019630407  0.7470560  0.4941120
  0.0020565123  0.7470737  0.4941474
  0.0021544347  0.7471798  0.4943596
  0.0022570197  0.7471798  0.4943596
  0.0023644894  0.7471621  0.4943242
  0.0024770764  0.7471267  0.4942535
  0.0025950242  0.7470737  0.4941474
  0.0027185882  0.7470030  0.4940060
  0.0028480359  0.7469499  0.4938998
  0.0029836472  0.7470737  0.4941474
  0.0031257158  0.7471444  0.4942889
  0.0032745492  0.7472151  0.4944303
  0.0034304693  0.7473212  0.4946425
  0.0035938137  0.7473743  0.4947486
  0.0037649358  0.7471974  0.4943949
  0.0039442061  0.7472151  0.4944303
  0.0041320124  0.7471267  0.4942535
  0.0043287613  0.7470383  0.4940767
  0.0045348785  0.7469676  0.4939352
  0.0047508102  0.7469322  0.4938645
  0.0049770236  0.7468615  0.4937230
  0.0052140083  0.7469499  0.4938998
  0.0054622772  0.7469322  0.4938645
  0.0057223677  0.7468438  0.4936876
  0.0059948425  0.7467200  0.4934401
  0.0062802914  0.7465962  0.4931925
  0.0065793322  0.7465078  0.4930157
  0.0068926121  0.7464725  0.4929450
  0.0072208090  0.7465786  0.4931572
  0.0075646333  0.7465255  0.4930511
  0.0079248290  0.7465432  0.4930865
  0.0083021757  0.7462603  0.4925206
  0.0086974900  0.7462957  0.4925914
  0.0091116276  0.7459420  0.4918841
  0.0095454846  0.7459774  0.4919548
  0.0100000000  0.7460127  0.4920256

Tuning parameter 'alpha' was held constant at a value of 1
Accuracy was used to select the optimal model using the largest value.
The final values used for the model were alpha = 1 and lambda = 0.003593814.
lasso_predictions <- predict(lasso_model, newdata = diabetes_test_data, na.action = na.pass)
lasso_confusion_matrix_class0 <- confusionMatrix(lasso_predictions, true_labels, mode='everything')
print(lasso_confusion_matrix_class0)
Confusion Matrix and Statistics

          Reference
Prediction    0    1
         0 5176 1626
         1 1893 5443
                                          
               Accuracy : 0.7511          
                 95% CI : (0.7439, 0.7582)
    No Information Rate : 0.5             
    P-Value [Acc > NIR] : < 2.2e-16       
                                          
                  Kappa : 0.5022          
                                          
 Mcnemar's Test P-Value : 7.323e-06       
                                          
            Sensitivity : 0.7322          
            Specificity : 0.7700          
         Pos Pred Value : 0.7610          
         Neg Pred Value : 0.7420          
              Precision : 0.7610          
                 Recall : 0.7322          
                     F1 : 0.7463          
             Prevalence : 0.5000          
         Detection Rate : 0.3661          
   Detection Prevalence : 0.4811          
      Balanced Accuracy : 0.7511          
                                          
       'Positive' Class : 0               
                                          
cat("\n\n")
lasso_confusion_matrix_class1 <- confusionMatrix(lasso_predictions, true_labels, mode='everything', positive='1')
print(lasso_confusion_matrix_class1)
Confusion Matrix and Statistics

          Reference
Prediction    0    1
         0 5176 1626
         1 1893 5443
                                          
               Accuracy : 0.7511          
                 95% CI : (0.7439, 0.7582)
    No Information Rate : 0.5             
    P-Value [Acc > NIR] : < 2.2e-16       
                                          
                  Kappa : 0.5022          
                                          
 Mcnemar's Test P-Value : 7.323e-06       
                                          
            Sensitivity : 0.7700          
            Specificity : 0.7322          
         Pos Pred Value : 0.7420          
         Neg Pred Value : 0.7610          
              Precision : 0.7420          
                 Recall : 0.7700          
                     F1 : 0.7557          
             Prevalence : 0.5000          
         Detection Rate : 0.3850          
   Detection Prevalence : 0.5189          
      Balanced Accuracy : 0.7511          
                                          
       'Positive' Class : 1               
                                          
cat("\n\n")

Ridge Model

library(glmnet)
library(caret)

set.seed(1)
tr = trainControl(method = "cv", number = 5, preProc = "nzv")
tg = expand.grid(alpha = 0, lambda = 10^seq(-3, -1, length =100))
ridge_model <- train(Diabetes_binary ~ ., data = diabetes_train_data, method = "glmnet", trControl = tr, tuneGrid = tg)
ridge_model
glmnet 

56554 samples
   21 predictor
    2 classes: '0', '1' 

No pre-processing
Resampling: Cross-Validated (5 fold) 
Summary of sample sizes: 45244, 45242, 45244, 45243, 45243 
Resampling results across tuning parameters:

  lambda       Accuracy   Kappa    
  0.001000000  0.7463664  0.4927329
  0.001047616  0.7463664  0.4927329
  0.001097499  0.7463664  0.4927329
  0.001149757  0.7463664  0.4927329
  0.001204504  0.7463664  0.4927329
  0.001261857  0.7463664  0.4927329
  0.001321941  0.7463664  0.4927329
  0.001384886  0.7463664  0.4927329
  0.001450829  0.7463664  0.4927329
  0.001519911  0.7463664  0.4927329
  0.001592283  0.7463664  0.4927329
  0.001668101  0.7463664  0.4927329
  0.001747528  0.7463664  0.4927329
  0.001830738  0.7463664  0.4927329
  0.001917910  0.7463664  0.4927329
  0.002009233  0.7463664  0.4927329
  0.002104904  0.7463664  0.4927329
  0.002205131  0.7463664  0.4927329
  0.002310130  0.7463664  0.4927329
  0.002420128  0.7463664  0.4927329
  0.002535364  0.7463664  0.4927329
  0.002656088  0.7463664  0.4927329
  0.002782559  0.7463664  0.4927329
  0.002915053  0.7463664  0.4927329
  0.003053856  0.7463664  0.4927329
  0.003199267  0.7463664  0.4927329
  0.003351603  0.7463664  0.4927329
  0.003511192  0.7463664  0.4927329
  0.003678380  0.7463664  0.4927329
  0.003853529  0.7463664  0.4927329
  0.004037017  0.7463664  0.4927329
  0.004229243  0.7463664  0.4927329
  0.004430621  0.7463664  0.4927329
  0.004641589  0.7463664  0.4927329
  0.004862602  0.7463664  0.4927329
  0.005094138  0.7463664  0.4927329
  0.005336699  0.7463664  0.4927329
  0.005590810  0.7463664  0.4927329
  0.005857021  0.7463664  0.4927329
  0.006135907  0.7463664  0.4927329
  0.006428073  0.7463664  0.4927329
  0.006734151  0.7463664  0.4927329
  0.007054802  0.7463664  0.4927329
  0.007390722  0.7463664  0.4927329
  0.007742637  0.7463664  0.4927329
  0.008111308  0.7463664  0.4927329
  0.008497534  0.7463664  0.4927329
  0.008902151  0.7463664  0.4927329
  0.009326033  0.7463664  0.4927329
  0.009770100  0.7463664  0.4927329
  0.010235310  0.7463664  0.4927329
  0.010722672  0.7463664  0.4927329
  0.011233240  0.7463664  0.4927329
  0.011768120  0.7463664  0.4927329
  0.012328467  0.7463664  0.4927329
  0.012915497  0.7463664  0.4927329
  0.013530478  0.7463664  0.4927329
  0.014174742  0.7463664  0.4927329
  0.014849683  0.7463664  0.4927329
  0.015556761  0.7463664  0.4927329
  0.016297508  0.7463664  0.4927329
  0.017073526  0.7463664  0.4927329
  0.017886495  0.7463664  0.4927329
  0.018738174  0.7463664  0.4927329
  0.019630407  0.7463664  0.4927329
  0.020565123  0.7464018  0.4928037
  0.021544347  0.7464549  0.4929098
  0.022570197  0.7465963  0.4931927
  0.023644894  0.7465610  0.4931220
  0.024770764  0.7464725  0.4929451
  0.025950242  0.7465609  0.4931219
  0.027185882  0.7463664  0.4927329
  0.028480359  0.7461012  0.4922024
  0.029836472  0.7461719  0.4923439
  0.031257158  0.7461719  0.4923439
  0.032745492  0.7458713  0.4917427
  0.034304693  0.7457829  0.4915659
  0.035938137  0.7459067  0.4918135
  0.037649358  0.7458536  0.4917074
  0.039442061  0.7459951  0.4919903
  0.041320124  0.7458359  0.4916720
  0.043287613  0.7458006  0.4916013
  0.045348785  0.7457122  0.4914245
  0.047508102  0.7458006  0.4916013
  0.049770236  0.7459244  0.4918488
  0.052140083  0.7459244  0.4918489
  0.054622772  0.7456768  0.4913538
  0.057223677  0.7456061  0.4912123
  0.059948425  0.7456238  0.4912477
  0.062802914  0.7455177  0.4910355
  0.065793322  0.7455354  0.4910709
  0.068926121  0.7455000  0.4910001
  0.072208090  0.7455000  0.4910001
  0.075646333  0.7456591  0.4913184
  0.079248290  0.7454823  0.4909648
  0.083021757  0.7456061  0.4912123
  0.086974900  0.7455531  0.4911063
  0.091116276  0.7454646  0.4909294
  0.095454846  0.7456238  0.4912477
  0.100000000  0.7455707  0.4911416

Tuning parameter 'alpha' was held constant at a value of 0
Accuracy was used to select the optimal model using the largest value.
The final values used for the model were alpha = 0 and lambda = 0.0225702.
ridge_predictions <- predict(ridge_model, newdata = diabetes_test_data, na.action = na.pass)
ridge_confusion_matrix_class0 <- confusionMatrix(ridge_predictions, true_labels, mode='everything')
print(ridge_confusion_matrix_class0)
Confusion Matrix and Statistics

          Reference
Prediction    0    1
         0 5184 1646
         1 1885 5423
                                         
               Accuracy : 0.7502         
                 95% CI : (0.743, 0.7574)
    No Information Rate : 0.5            
    P-Value [Acc > NIR] : < 2.2e-16      
                                         
                  Kappa : 0.5005         
                                         
 Mcnemar's Test P-Value : 6.196e-05      
                                         
            Sensitivity : 0.7333         
            Specificity : 0.7672         
         Pos Pred Value : 0.7590         
         Neg Pred Value : 0.7421         
              Precision : 0.7590         
                 Recall : 0.7333         
                     F1 : 0.7460         
             Prevalence : 0.5000         
         Detection Rate : 0.3667         
   Detection Prevalence : 0.4831         
      Balanced Accuracy : 0.7502         
                                         
       'Positive' Class : 0              
                                         
cat("\n\n")
ridge_confusion_matrix_class1 <- confusionMatrix(ridge_predictions, true_labels, mode='everything', positive='1')
print(ridge_confusion_matrix_class1)
Confusion Matrix and Statistics

          Reference
Prediction    0    1
         0 5184 1646
         1 1885 5423
                                         
               Accuracy : 0.7502         
                 95% CI : (0.743, 0.7574)
    No Information Rate : 0.5            
    P-Value [Acc > NIR] : < 2.2e-16      
                                         
                  Kappa : 0.5005         
                                         
 Mcnemar's Test P-Value : 6.196e-05      
                                         
            Sensitivity : 0.7672         
            Specificity : 0.7333         
         Pos Pred Value : 0.7421         
         Neg Pred Value : 0.7590         
              Precision : 0.7421         
                 Recall : 0.7672         
                     F1 : 0.7544         
             Prevalence : 0.5000         
         Detection Rate : 0.3836         
   Detection Prevalence : 0.5169         
      Balanced Accuracy : 0.7502         
                                         
       'Positive' Class : 1              
                                         
cat("\n\n")

Elastic Net Model

library(glmnet)
library(caret)

set.seed(1)
tr = trainControl(method = "cv", number = 5, preProc = "nzv")
tg = expand.grid(alpha =seq(0, 1, length=10), lambda = 10^seq(-3, 1, length = 100))
enet_model <- train(Diabetes_binary ~ ., data = diabetes_train_data, method = "glmnet", trControl = tr, tuneGrid = tg)
enet_model
glmnet 

56554 samples
   21 predictor
    2 classes: '0', '1' 

No pre-processing
Resampling: Cross-Validated (5 fold) 
Summary of sample sizes: 45244, 45242, 45244, 45243, 45243 
Resampling results across tuning parameters:

  alpha      lambda        Accuracy   Kappa    
  0.0000000   0.001000000  0.7463664  0.4927329
  0.0000000   0.001097499  0.7463664  0.4927329
  0.0000000   0.001204504  0.7463664  0.4927329
  0.0000000   0.001321941  0.7463664  0.4927329
  0.0000000   0.001450829  0.7463664  0.4927329
  0.0000000   0.001592283  0.7463664  0.4927329
  0.0000000   0.001747528  0.7463664  0.4927329
  0.0000000   0.001917910  0.7463664  0.4927329
  0.0000000   0.002104904  0.7463664  0.4927329
  0.0000000   0.002310130  0.7463664  0.4927329
  0.0000000   0.002535364  0.7463664  0.4927329
  0.0000000   0.002782559  0.7463664  0.4927329
  0.0000000   0.003053856  0.7463664  0.4927329
  0.0000000   0.003351603  0.7463664  0.4927329
  0.0000000   0.003678380  0.7463664  0.4927329
  0.0000000   0.004037017  0.7463664  0.4927329
  0.0000000   0.004430621  0.7463664  0.4927329
  0.0000000   0.004862602  0.7463664  0.4927329
  0.0000000   0.005336699  0.7463664  0.4927329
  0.0000000   0.005857021  0.7463664  0.4927329
  0.0000000   0.006428073  0.7463664  0.4927329
  0.0000000   0.007054802  0.7463664  0.4927329
  0.0000000   0.007742637  0.7463664  0.4927329
  0.0000000   0.008497534  0.7463664  0.4927329
  0.0000000   0.009326033  0.7463664  0.4927329
  0.0000000   0.010235310  0.7463664  0.4927329
  0.0000000   0.011233240  0.7463664  0.4927329
  0.0000000   0.012328467  0.7463664  0.4927329
  0.0000000   0.013530478  0.7463664  0.4927329
  0.0000000   0.014849683  0.7463664  0.4927329
  0.0000000   0.016297508  0.7463664  0.4927329
  0.0000000   0.017886495  0.7463664  0.4927329
  0.0000000   0.019630407  0.7463664  0.4927329
  0.0000000   0.021544347  0.7464549  0.4929098
  0.0000000   0.023644894  0.7465610  0.4931220
  0.0000000   0.025950242  0.7465609  0.4931219
  0.0000000   0.028480359  0.7461012  0.4922024
  0.0000000   0.031257158  0.7461719  0.4923439
  0.0000000   0.034304693  0.7457829  0.4915659
  0.0000000   0.037649358  0.7458536  0.4917074
  0.0000000   0.041320124  0.7458359  0.4916720
  0.0000000   0.045348785  0.7457122  0.4914245
  0.0000000   0.049770236  0.7459244  0.4918488
  0.0000000   0.054622772  0.7456768  0.4913538
  0.0000000   0.059948425  0.7456238  0.4912477
  0.0000000   0.065793322  0.7455354  0.4910709
  0.0000000   0.072208090  0.7455000  0.4910001
  0.0000000   0.079248290  0.7454823  0.4909648
  0.0000000   0.086974900  0.7455531  0.4911063
  0.0000000   0.095454846  0.7456238  0.4912477
  0.0000000   0.104761575  0.7455884  0.4911770
  0.0000000   0.114975700  0.7454293  0.4908587
  0.0000000   0.126185688  0.7449872  0.4899746
  0.0000000   0.138488637  0.7447574  0.4895149
  0.0000000   0.151991108  0.7445275  0.4890552
  0.0000000   0.166810054  0.7441915  0.4883833
  0.0000000   0.183073828  0.7437849  0.4875699
  0.0000000   0.200923300  0.7432544  0.4865089
  0.0000000   0.220513074  0.7428300  0.4856602
  0.0000000   0.242012826  0.7427416  0.4854833
  0.0000000   0.265608778  0.7425648  0.4851297
  0.0000000   0.291505306  0.7424587  0.4849175
  0.0000000   0.319926714  0.7421404  0.4842810
  0.0000000   0.351119173  0.7416807  0.4833615
  0.0000000   0.385352859  0.7411679  0.4823360
  0.0000000   0.422924287  0.7406374  0.4812751
  0.0000000   0.464158883  0.7403545  0.4807092
  0.0000000   0.509413801  0.7398771  0.4797544
  0.0000000   0.559081018  0.7395411  0.4790825
  0.0000000   0.613590727  0.7387808  0.4775618
  0.0000000   0.673415066  0.7384095  0.4768192
  0.0000000   0.739072203  0.7378613  0.4757229
  0.0000000   0.811130831  0.7374900  0.4749803
  0.0000000   0.890215085  0.7369772  0.4739547
  0.0000000   0.977009957  0.7365528  0.4731059
  0.0000000   1.072267222  0.7360578  0.4721157
  0.0000000   1.176811952  0.7355803  0.4711609
  0.0000000   1.291549665  0.7351913  0.4703829
  0.0000000   1.417474163  0.7346785  0.4693573
  0.0000000   1.555676144  0.7338475  0.4676951
  0.0000000   1.707352647  0.7333877  0.4667757
  0.0000000   1.873817423  0.7330518  0.4661037
  0.0000000   2.056512308  0.7326628  0.4653257
  0.0000000   2.257019720  0.7323622  0.4647245
  0.0000000   2.477076356  0.7320793  0.4641587
  0.0000000   2.718588243  0.7318494  0.4636990
  0.0000000   2.983647240  0.7316372  0.4632746
  0.0000000   3.274549163  0.7314250  0.4628503
  0.0000000   3.593813664  0.7309123  0.4618247
  0.0000000   3.944206059  0.7306117  0.4612235
  0.0000000   4.328761281  0.7305763  0.4611528
  0.0000000   4.750810162  0.7302934  0.4605869
  0.0000000   5.214008288  0.7300458  0.4600918
  0.0000000   5.722367659  0.7300635  0.4601272
  0.0000000   6.280291442  0.7299928  0.4599857
  0.0000000   6.892612104  0.7297275  0.4594553
  0.0000000   7.564633276  0.7295154  0.4590309
  0.0000000   8.302175681  0.7291794  0.4583590
  0.0000000   9.111627561  0.7291617  0.4583236
  0.0000000  10.000000000  0.7291087  0.4582176
  0.1111111   0.001000000  0.7471798  0.4943597
  0.1111111   0.001097499  0.7471798  0.4943597
  0.1111111   0.001204504  0.7471798  0.4943597
  0.1111111   0.001321941  0.7471798  0.4943597
  0.1111111   0.001450829  0.7471798  0.4943597
  0.1111111   0.001592283  0.7471798  0.4943597
  0.1111111   0.001747528  0.7471798  0.4943597
  0.1111111   0.001917910  0.7471798  0.4943597
  0.1111111   0.002104904  0.7471798  0.4943597
  0.1111111   0.002310130  0.7471444  0.4942889
  0.1111111   0.002535364  0.7471267  0.4942535
  0.1111111   0.002782559  0.7470560  0.4941121
  0.1111111   0.003053856  0.7470737  0.4941475
  0.1111111   0.003351603  0.7470914  0.4941828
  0.1111111   0.003678380  0.7471091  0.4942182
  0.1111111   0.004037017  0.7470737  0.4941474
  0.1111111   0.004430621  0.7470914  0.4941828
  0.1111111   0.004862602  0.7469853  0.4939706
  0.1111111   0.005336699  0.7470560  0.4941121
  0.1111111   0.005857021  0.7468792  0.4937585
  0.1111111   0.006428073  0.7468438  0.4936877
  0.1111111   0.007054802  0.7467554  0.4935109
  0.1111111   0.007742637  0.7467908  0.4935817
  0.1111111   0.008497534  0.7468085  0.4936170
  0.1111111   0.009326033  0.7466847  0.4933694
  0.1111111   0.010235310  0.7467731  0.4935463
  0.1111111   0.011233240  0.7467731  0.4935463
  0.1111111   0.012328467  0.7467377  0.4934755
  0.1111111   0.013530478  0.7467554  0.4935109
  0.1111111   0.014849683  0.7467908  0.4935816
  0.1111111   0.016297508  0.7466140  0.4932280
  0.1111111   0.017886495  0.7467908  0.4935816
  0.1111111   0.019630407  0.7467378  0.4934756
  0.1111111   0.021544347  0.7467201  0.4934402
  0.1111111   0.023644894  0.7468792  0.4937585
  0.1111111   0.025950242  0.7467201  0.4934402
  0.1111111   0.028480359  0.7468615  0.4937231
  0.1111111   0.031257158  0.7467377  0.4934756
  0.1111111   0.034304693  0.7463487  0.4926975
  0.1111111   0.037649358  0.7462426  0.4924854
  0.1111111   0.041320124  0.7458713  0.4917427
  0.1111111   0.045348785  0.7460304  0.4920610
  0.1111111   0.049770236  0.7460658  0.4921317
  0.1111111   0.054622772  0.7461719  0.4923439
  0.1111111   0.059948425  0.7462426  0.4924854
  0.1111111   0.065793322  0.7459597  0.4919196
  0.1111111   0.072208090  0.7457122  0.4914245
  0.1111111   0.079248290  0.7455353  0.4910708
  0.1111111   0.086974900  0.7453055  0.4906111
  0.1111111   0.095454846  0.7453762  0.4907525
  0.1111111   0.104761575  0.7453054  0.4906110
  0.1111111   0.114975700  0.7450049  0.4900099
  0.1111111   0.126185688  0.7447396  0.4894794
  0.1111111   0.138488637  0.7447396  0.4894794
  0.1111111   0.151991108  0.7446689  0.4893379
  0.1111111   0.166810054  0.7447573  0.4895146
  0.1111111   0.183073828  0.7439439  0.4878879
  0.1111111   0.200923300  0.7434488  0.4868977
  0.1111111   0.220513074  0.7428299  0.4856599
  0.1111111   0.242012826  0.7425647  0.4851295
  0.1111111   0.265608778  0.7421403  0.4842807
  0.1111111   0.291505306  0.7418574  0.4837149
  0.1111111   0.319926714  0.7413446  0.4826893
  0.1111111   0.351119173  0.7410971  0.4821942
  0.1111111   0.385352859  0.7405312  0.4810626
  0.1111111   0.422924287  0.7401422  0.4802845
  0.1111111   0.464158883  0.7395764  0.4791529
  0.1111111   0.509413801  0.7392758  0.4785518
  0.1111111   0.559081018  0.7388691  0.4777384
  0.1111111   0.613590727  0.7386039  0.4772079
  0.1111111   0.673415066  0.7377374  0.4754749
  0.1111111   0.739072203  0.7364643  0.4729287
  0.1111111   0.811130831  0.7348552  0.4697106
  0.1111111   0.890215085  0.7320615  0.4641231
  0.1111111   0.977009957  0.7296213  0.4592429
  0.1111111   1.072267222  0.7277116  0.4554235
  0.1111111   1.176811952  0.7266507  0.4533018
  0.1111111   1.291549665  0.7100294  0.4200590
  0.1111111   1.417474163  0.7099410  0.4198822
  0.1111111   1.555676144  0.7099410  0.4198822
  0.1111111   1.707352647  0.7041055  0.4082111
  0.1111111   1.873817423  0.4999823  0.0000000
  0.1111111   2.056512308  0.4999823  0.0000000
  0.1111111   2.257019720  0.4999823  0.0000000
  0.1111111   2.477076356  0.4999823  0.0000000
  0.1111111   2.718588243  0.4999823  0.0000000
  0.1111111   2.983647240  0.4999823  0.0000000
  0.1111111   3.274549163  0.4999823  0.0000000
  0.1111111   3.593813664  0.4999823  0.0000000
  0.1111111   3.944206059  0.4999823  0.0000000
  0.1111111   4.328761281  0.4999823  0.0000000
  0.1111111   4.750810162  0.4999823  0.0000000
  0.1111111   5.214008288  0.4999823  0.0000000
  0.1111111   5.722367659  0.4999823  0.0000000
  0.1111111   6.280291442  0.4999823  0.0000000
  0.1111111   6.892612104  0.4999823  0.0000000
  0.1111111   7.564633276  0.4999823  0.0000000
  0.1111111   8.302175681  0.4999823  0.0000000
  0.1111111   9.111627561  0.4999823  0.0000000
  0.1111111  10.000000000  0.4999823  0.0000000
  0.2222222   0.001000000  0.7470737  0.4941475
  0.2222222   0.001097499  0.7470737  0.4941475
  0.2222222   0.001204504  0.7470737  0.4941475
  0.2222222   0.001321941  0.7470737  0.4941475
  0.2222222   0.001450829  0.7470737  0.4941475
  0.2222222   0.001592283  0.7470737  0.4941475
  0.2222222   0.001747528  0.7470737  0.4941475
  0.2222222   0.001917910  0.7470914  0.4941828
  0.2222222   0.002104904  0.7471091  0.4942182
  0.2222222   0.002310130  0.7471267  0.4942536
  0.2222222   0.002535364  0.7471267  0.4942535
  0.2222222   0.002782559  0.7470560  0.4941121
  0.2222222   0.003053856  0.7469853  0.4939706
  0.2222222   0.003351603  0.7470914  0.4941828
  0.2222222   0.003678380  0.7470383  0.4940767
  0.2222222   0.004037017  0.7470030  0.4940060
  0.2222222   0.004430621  0.7469853  0.4939706
  0.2222222   0.004862602  0.7470030  0.4940060
  0.2222222   0.005336699  0.7469676  0.4939352
  0.2222222   0.005857021  0.7470206  0.4940413
  0.2222222   0.006428073  0.7471090  0.4942182
  0.2222222   0.007054802  0.7470030  0.4940060
  0.2222222   0.007742637  0.7469499  0.4938999
  0.2222222   0.008497534  0.7469322  0.4938645
  0.2222222   0.009326033  0.7468615  0.4937230
  0.2222222   0.010235310  0.7470030  0.4940060
  0.2222222   0.011233240  0.7467731  0.4935462
  0.2222222   0.012328467  0.7467554  0.4935109
  0.2222222   0.013530478  0.7465609  0.4931219
  0.2222222   0.014849683  0.7464725  0.4929450
  0.2222222   0.016297508  0.7466139  0.4932279
  0.2222222   0.017886495  0.7465078  0.4930157
  0.2222222   0.019630407  0.7467554  0.4935109
  0.2222222   0.021544347  0.7468792  0.4937584
  0.2222222   0.023644894  0.7465255  0.4930511
  0.2222222   0.025950242  0.7463664  0.4927329
  0.2222222   0.028480359  0.7463487  0.4926975
  0.2222222   0.031257158  0.7462249  0.4924499
  0.2222222   0.034304693  0.7463134  0.4926268
  0.2222222   0.037649358  0.7461896  0.4923793
  0.2222222   0.041320124  0.7460128  0.4920256
  0.2222222   0.045348785  0.7458360  0.4916720
  0.2222222   0.049770236  0.7456768  0.4913538
  0.2222222   0.054622772  0.7455530  0.4911062
  0.2222222   0.059948425  0.7453762  0.4907526
  0.2222222   0.065793322  0.7451994  0.4903989
  0.2222222   0.072208090  0.7450225  0.4900452
  0.2222222   0.079248290  0.7444744  0.4889489
  0.2222222   0.086974900  0.7442798  0.4885598
  0.2222222   0.095454846  0.7442798  0.4885598
 [ reached getOption("max.print") -- omitted 750 rows ]

Accuracy was used to select the optimal model using the largest value.
The final values used for the model were alpha = 0.7777778 and lambda = 0.004430621.
enet_predictions <- predict(enet_model, newdata = diabetes_test_data, na.action = na.pass)
enet_confusion_matrix_class0 <- confusionMatrix(enet_predictions, true_labels, mode='everything')
print(enet_confusion_matrix_class0)
Confusion Matrix and Statistics

          Reference
Prediction    0    1
         0 5175 1626
         1 1894 5443
                                          
               Accuracy : 0.751           
                 95% CI : (0.7438, 0.7581)
    No Information Rate : 0.5             
    P-Value [Acc > NIR] : < 2.2e-16       
                                          
                  Kappa : 0.5021          
                                          
 Mcnemar's Test P-Value : 6.786e-06       
                                          
            Sensitivity : 0.7321          
            Specificity : 0.7700          
         Pos Pred Value : 0.7609          
         Neg Pred Value : 0.7419          
              Precision : 0.7609          
                 Recall : 0.7321          
                     F1 : 0.7462          
             Prevalence : 0.5000          
         Detection Rate : 0.3660          
   Detection Prevalence : 0.4810          
      Balanced Accuracy : 0.7510          
                                          
       'Positive' Class : 0               
                                          
cat("\n\n")
enet_confusion_matrix_class1 <- confusionMatrix(enet_predictions, true_labels, mode='everything', positive='1')
print(enet_confusion_matrix_class1)
Confusion Matrix and Statistics

          Reference
Prediction    0    1
         0 5175 1626
         1 1894 5443
                                          
               Accuracy : 0.751           
                 95% CI : (0.7438, 0.7581)
    No Information Rate : 0.5             
    P-Value [Acc > NIR] : < 2.2e-16       
                                          
                  Kappa : 0.5021          
                                          
 Mcnemar's Test P-Value : 6.786e-06       
                                          
            Sensitivity : 0.7700          
            Specificity : 0.7321          
         Pos Pred Value : 0.7419          
         Neg Pred Value : 0.7609          
              Precision : 0.7419          
                 Recall : 0.7700          
                     F1 : 0.7557          
             Prevalence : 0.5000          
         Detection Rate : 0.3850          
   Detection Prevalence : 0.5190          
      Balanced Accuracy : 0.7510          
                                          
       'Positive' Class : 1               
                                          
cat("\n\n")

Random Forest Model

library(caret)
set.seed(1)
tr = trainControl(method = "cv", number = 5, preProc = "nzv")
rf_model <- train(Diabetes_binary ~ ., data = diabetes_train_data, method = "rf", trControl = tr, importance = TRUE)
rf_model
Random Forest 

56554 samples
   21 predictor
    2 classes: '0', '1' 

No pre-processing
Resampling: Cross-Validated (5 fold) 
Summary of sample sizes: 45244, 45242, 45244, 45243, 45243 
Resampling results across tuning parameters:

  mtry  Accuracy   Kappa    
   2    0.7489127  0.4978257
  11    0.7335291  0.4670583
  21    0.7281536  0.4563074

Accuracy was used to select the optimal model using the largest value.
The final value used for the model was mtry = 2.
varImp(rf_model)
rf variable importance

  only 20 most important variables shown (out of 21)
rf_predictions <- predict(rf_model, newdata = diabetes_test_data, na.action = na.pass)
rf_confusion_matrix_class0 <- confusionMatrix(rf_predictions, true_labels, mode='everything')
print(rf_confusion_matrix_class0)
Confusion Matrix and Statistics

          Reference
Prediction    0    1
         0 5241 1196
         1 1828 5873
                                          
               Accuracy : 0.7861          
                 95% CI : (0.7793, 0.7928)
    No Information Rate : 0.5             
    P-Value [Acc > NIR] : < 2.2e-16       
                                          
                  Kappa : 0.5722          
                                          
 Mcnemar's Test P-Value : < 2.2e-16       
                                          
            Sensitivity : 0.7414          
            Specificity : 0.8308          
         Pos Pred Value : 0.8142          
         Neg Pred Value : 0.7626          
              Precision : 0.8142          
                 Recall : 0.7414          
                     F1 : 0.7761          
             Prevalence : 0.5000          
         Detection Rate : 0.3707          
   Detection Prevalence : 0.4553          
      Balanced Accuracy : 0.7861          
                                          
       'Positive' Class : 0               
                                          
cat("\n\n")
rf_confusion_matrix_class1 <- confusionMatrix(rf_predictions, true_labels, mode='everything', positive='1')
print(rf_confusion_matrix_class1)
Confusion Matrix and Statistics

          Reference
Prediction    0    1
         0 4975 1436
         1 2094 5633
                                          
               Accuracy : 0.7503          
                 95% CI : (0.7431, 0.7574)
    No Information Rate : 0.5             
    P-Value [Acc > NIR] : < 2.2e-16       
                                          
                  Kappa : 0.5006          
                                          
 Mcnemar's Test P-Value : < 2.2e-16       
                                          
            Sensitivity : 0.7969          
            Specificity : 0.7038          
         Pos Pred Value : 0.7290          
         Neg Pred Value : 0.7760          
              Precision : 0.7290          
                 Recall : 0.7969          
                     F1 : 0.7614          
             Prevalence : 0.5000          
         Detection Rate : 0.3984          
   Detection Prevalence : 0.5465          
      Balanced Accuracy : 0.7503          
                                          
       'Positive' Class : 1               
                                          
cat("\n\n")

GBM without sampling

library(caret)
set.seed(1)
tr = trainControl(method = "cv", number = 5, preProc = "nzv")
gbm_model <- train(Diabetes_binary ~ ., data = diabetes_train_data, method = "gbm", trControl = tr)
Iter   TrainDeviance   ValidDeviance   StepSize   Improve
     1        1.3588             nan     0.1000    0.0139
     2        1.3337             nan     0.1000    0.0127
     3        1.3131             nan     0.1000    0.0103
     4        1.2927             nan     0.1000    0.0101
     5        1.2761             nan     0.1000    0.0082
     6        1.2610             nan     0.1000    0.0075
     7        1.2481             nan     0.1000    0.0063
     8        1.2360             nan     0.1000    0.0059
     9        1.2262             nan     0.1000    0.0049
    10        1.2168             nan     0.1000    0.0045
    20        1.1473             nan     0.1000    0.0024
    40        1.0818             nan     0.1000    0.0011
    60        1.0533             nan     0.1000    0.0005
    80        1.0377             nan     0.1000    0.0003
   100        1.0279             nan     0.1000    0.0002
   120        1.0218             nan     0.1000    0.0001
   140        1.0178             nan     0.1000    0.0001
   150        1.0164             nan     0.1000    0.0001

Iter   TrainDeviance   ValidDeviance   StepSize   Improve
     1        1.3499             nan     0.1000    0.0181
     2        1.3198             nan     0.1000    0.0152
     3        1.2949             nan     0.1000    0.0124
     4        1.2729             nan     0.1000    0.0110
     5        1.2546             nan     0.1000    0.0091
     6        1.2379             nan     0.1000    0.0082
     7        1.2223             nan     0.1000    0.0075
     8        1.2090             nan     0.1000    0.0066
     9        1.1967             nan     0.1000    0.0060
    10        1.1863             nan     0.1000    0.0051
    20        1.1059             nan     0.1000    0.0027
    40        1.0462             nan     0.1000    0.0007
    60        1.0242             nan     0.1000    0.0003
    80        1.0141             nan     0.1000    0.0002
   100        1.0090             nan     0.1000    0.0001
   120        1.0062             nan     0.1000    0.0001
   140        1.0043             nan     0.1000    0.0000
   150        1.0035             nan     0.1000    0.0000

Iter   TrainDeviance   ValidDeviance   StepSize   Improve
     1        1.3434             nan     0.1000    0.0213
     2        1.3091             nan     0.1000    0.0173
     3        1.2808             nan     0.1000    0.0141
     4        1.2581             nan     0.1000    0.0116
     5        1.2378             nan     0.1000    0.0101
     6        1.2202             nan     0.1000    0.0086
     7        1.2043             nan     0.1000    0.0081
     8        1.1882             nan     0.1000    0.0077
     9        1.1753             nan     0.1000    0.0063
    10        1.1634             nan     0.1000    0.0057
    20        1.0860             nan     0.1000    0.0022
    40        1.0306             nan     0.1000    0.0008
    60        1.0127             nan     0.1000    0.0002
    80        1.0059             nan     0.1000    0.0001
   100        1.0024             nan     0.1000    0.0001
   120        1.0004             nan     0.1000    0.0000
   140        0.9986             nan     0.1000    0.0000
   150        0.9980             nan     0.1000   -0.0000

Iter   TrainDeviance   ValidDeviance   StepSize   Improve
     1        1.3590             nan     0.1000    0.0137
     2        1.3335             nan     0.1000    0.0127
     3        1.3126             nan     0.1000    0.0104
     4        1.2930             nan     0.1000    0.0098
     5        1.2767             nan     0.1000    0.0080
     6        1.2618             nan     0.1000    0.0076
     7        1.2494             nan     0.1000    0.0062
     8        1.2376             nan     0.1000    0.0059
     9        1.2276             nan     0.1000    0.0049
    10        1.2180             nan     0.1000    0.0046
    20        1.1483             nan     0.1000    0.0024
    40        1.0822             nan     0.1000    0.0010
    60        1.0534             nan     0.1000    0.0005
    80        1.0378             nan     0.1000    0.0003
   100        1.0286             nan     0.1000    0.0002
   120        1.0223             nan     0.1000    0.0001
   140        1.0182             nan     0.1000    0.0001
   150        1.0169             nan     0.1000    0.0000

Iter   TrainDeviance   ValidDeviance   StepSize   Improve
     1        1.3503             nan     0.1000    0.0180
     2        1.3203             nan     0.1000    0.0150
     3        1.2944             nan     0.1000    0.0127
     4        1.2728             nan     0.1000    0.0105
     5        1.2539             nan     0.1000    0.0093
     6        1.2381             nan     0.1000    0.0077
     7        1.2235             nan     0.1000    0.0071
     8        1.2105             nan     0.1000    0.0063
     9        1.1988             nan     0.1000    0.0058
    10        1.1877             nan     0.1000    0.0056
    20        1.1077             nan     0.1000    0.0026
    40        1.0454             nan     0.1000    0.0006
    60        1.0235             nan     0.1000    0.0003
    80        1.0133             nan     0.1000    0.0002
   100        1.0084             nan     0.1000    0.0001
   120        1.0055             nan     0.1000    0.0000
   140        1.0035             nan     0.1000    0.0000
   150        1.0027             nan     0.1000   -0.0000

Iter   TrainDeviance   ValidDeviance   StepSize   Improve
     1        1.3448             nan     0.1000    0.0211
     2        1.3105             nan     0.1000    0.0173
     3        1.2822             nan     0.1000    0.0140
     4        1.2593             nan     0.1000    0.0113
     5        1.2386             nan     0.1000    0.0103
     6        1.2204             nan     0.1000    0.0090
     7        1.2046             nan     0.1000    0.0077
     8        1.1909             nan     0.1000    0.0067
     9        1.1787             nan     0.1000    0.0060
    10        1.1661             nan     0.1000    0.0061
    20        1.0843             nan     0.1000    0.0028
    40        1.0304             nan     0.1000    0.0007
    60        1.0131             nan     0.1000    0.0003
    80        1.0058             nan     0.1000    0.0001
   100        1.0027             nan     0.1000   -0.0000
   120        1.0004             nan     0.1000    0.0000
   140        0.9987             nan     0.1000   -0.0000
   150        0.9981             nan     0.1000   -0.0000

Iter   TrainDeviance   ValidDeviance   StepSize   Improve
     1        1.3592             nan     0.1000    0.0135
     2        1.3337             nan     0.1000    0.0127
     3        1.3135             nan     0.1000    0.0103
     4        1.2938             nan     0.1000    0.0098
     5        1.2775             nan     0.1000    0.0079
     6        1.2620             nan     0.1000    0.0075
     7        1.2498             nan     0.1000    0.0061
     8        1.2378             nan     0.1000    0.0058
     9        1.2280             nan     0.1000    0.0047
    10        1.2185             nan     0.1000    0.0046
    20        1.1488             nan     0.1000    0.0025
    40        1.0838             nan     0.1000    0.0009
    60        1.0556             nan     0.1000    0.0004
    80        1.0398             nan     0.1000    0.0003
   100        1.0308             nan     0.1000    0.0001
   120        1.0250             nan     0.1000    0.0001
   140        1.0207             nan     0.1000    0.0001
   150        1.0195             nan     0.1000    0.0000

Iter   TrainDeviance   ValidDeviance   StepSize   Improve
     1        1.3515             nan     0.1000    0.0172
     2        1.3211             nan     0.1000    0.0150
     3        1.2962             nan     0.1000    0.0124
     4        1.2745             nan     0.1000    0.0107
     5        1.2561             nan     0.1000    0.0093
     6        1.2398             nan     0.1000    0.0081
     7        1.2251             nan     0.1000    0.0072
     8        1.2122             nan     0.1000    0.0066
     9        1.2003             nan     0.1000    0.0058
    10        1.1882             nan     0.1000    0.0059
    20        1.1087             nan     0.1000    0.0022
    40        1.0484             nan     0.1000    0.0007
    60        1.0266             nan     0.1000    0.0003
    80        1.0165             nan     0.1000    0.0001
   100        1.0115             nan     0.1000    0.0000
   120        1.0084             nan     0.1000    0.0000
   140        1.0066             nan     0.1000   -0.0000
   150        1.0060             nan     0.1000   -0.0000

Iter   TrainDeviance   ValidDeviance   StepSize   Improve
     1        1.3436             nan     0.1000    0.0209
     2        1.3093             nan     0.1000    0.0171
     3        1.2815             nan     0.1000    0.0139
     4        1.2584             nan     0.1000    0.0116
     5        1.2386             nan     0.1000    0.0098
     6        1.2212             nan     0.1000    0.0086
     7        1.2060             nan     0.1000    0.0074
     8        1.1918             nan     0.1000    0.0071
     9        1.1781             nan     0.1000    0.0068
    10        1.1673             nan     0.1000    0.0054
    20        1.0862             nan     0.1000    0.0029
    40        1.0337             nan     0.1000    0.0006
    60        1.0157             nan     0.1000    0.0002
    80        1.0088             nan     0.1000    0.0000
   100        1.0052             nan     0.1000   -0.0000
   120        1.0031             nan     0.1000    0.0000
   140        1.0012             nan     0.1000   -0.0000
   150        1.0004             nan     0.1000    0.0000

Iter   TrainDeviance   ValidDeviance   StepSize   Improve
     1        1.3585             nan     0.1000    0.0138
     2        1.3334             nan     0.1000    0.0125
     3        1.3120             nan     0.1000    0.0105
     4        1.2931             nan     0.1000    0.0096
     5        1.2769             nan     0.1000    0.0081
     6        1.2621             nan     0.1000    0.0075
     7        1.2496             nan     0.1000    0.0061
     8        1.2375             nan     0.1000    0.0059
     9        1.2277             nan     0.1000    0.0050
    10        1.2178             nan     0.1000    0.0048
    20        1.1481             nan     0.1000    0.0025
    40        1.0815             nan     0.1000    0.0011
    60        1.0527             nan     0.1000    0.0006
    80        1.0369             nan     0.1000    0.0003
   100        1.0272             nan     0.1000    0.0001
   120        1.0211             nan     0.1000    0.0001
   140        1.0173             nan     0.1000    0.0001
   150        1.0158             nan     0.1000    0.0001

Iter   TrainDeviance   ValidDeviance   StepSize   Improve
     1        1.3503             nan     0.1000    0.0180
     2        1.3208             nan     0.1000    0.0151
     3        1.2963             nan     0.1000    0.0123
     4        1.2738             nan     0.1000    0.0112
     5        1.2555             nan     0.1000    0.0092
     6        1.2389             nan     0.1000    0.0081
     7        1.2247             nan     0.1000    0.0070
     8        1.2118             nan     0.1000    0.0063
     9        1.1992             nan     0.1000    0.0062
    10        1.1853             nan     0.1000    0.0068
    20        1.1068             nan     0.1000    0.0027
    40        1.0462             nan     0.1000    0.0007
    60        1.0238             nan     0.1000    0.0002
    80        1.0139             nan     0.1000    0.0001
   100        1.0090             nan     0.1000    0.0001
   120        1.0060             nan     0.1000    0.0000
   140        1.0042             nan     0.1000    0.0000
   150        1.0034             nan     0.1000    0.0000

Iter   TrainDeviance   ValidDeviance   StepSize   Improve
     1        1.3439             nan     0.1000    0.0210
     2        1.3098             nan     0.1000    0.0170
     3        1.2816             nan     0.1000    0.0141
     4        1.2586             nan     0.1000    0.0117
     5        1.2383             nan     0.1000    0.0101
     6        1.2203             nan     0.1000    0.0088
     7        1.2037             nan     0.1000    0.0082
     8        1.1894             nan     0.1000    0.0072
     9        1.1773             nan     0.1000    0.0061
    10        1.1652             nan     0.1000    0.0059
    20        1.0874             nan     0.1000    0.0026
    40        1.0315             nan     0.1000    0.0006
    60        1.0133             nan     0.1000    0.0002
    80        1.0063             nan     0.1000    0.0001
   100        1.0026             nan     0.1000    0.0000
   120        1.0004             nan     0.1000   -0.0000
   140        0.9986             nan     0.1000    0.0000
   150        0.9979             nan     0.1000   -0.0000

Iter   TrainDeviance   ValidDeviance   StepSize   Improve
     1        1.3591             nan     0.1000    0.0137
     2        1.3332             nan     0.1000    0.0127
     3        1.3123             nan     0.1000    0.0104
     4        1.2927             nan     0.1000    0.0098
     5        1.2765             nan     0.1000    0.0080
     6        1.2616             nan     0.1000    0.0076
     7        1.2490             nan     0.1000    0.0062
     8        1.2370             nan     0.1000    0.0059
     9        1.2271             nan     0.1000    0.0047
    10        1.2174             nan     0.1000    0.0048
    20        1.1486             nan     0.1000    0.0026
    40        1.0839             nan     0.1000    0.0009
    60        1.0551             nan     0.1000    0.0004
    80        1.0395             nan     0.1000    0.0003
   100        1.0303             nan     0.1000    0.0001
   120        1.0243             nan     0.1000    0.0001
   140        1.0204             nan     0.1000    0.0000
   150        1.0190             nan     0.1000    0.0001

Iter   TrainDeviance   ValidDeviance   StepSize   Improve
     1        1.3498             nan     0.1000    0.0177
     2        1.3199             nan     0.1000    0.0151
     3        1.2945             nan     0.1000    0.0127
     4        1.2733             nan     0.1000    0.0108
     5        1.2553             nan     0.1000    0.0089
     6        1.2387             nan     0.1000    0.0083
     7        1.2248             nan     0.1000    0.0068
     8        1.2115             nan     0.1000    0.0066
     9        1.1990             nan     0.1000    0.0061
    10        1.1856             nan     0.1000    0.0068
    20        1.1074             nan     0.1000    0.0028
    40        1.0474             nan     0.1000    0.0009
    60        1.0262             nan     0.1000    0.0004
    80        1.0165             nan     0.1000    0.0002
   100        1.0116             nan     0.1000    0.0000
   120        1.0089             nan     0.1000    0.0000
   140        1.0071             nan     0.1000   -0.0000
   150        1.0063             nan     0.1000    0.0000

Iter   TrainDeviance   ValidDeviance   StepSize   Improve
     1        1.3438             nan     0.1000    0.0210
     2        1.3093             nan     0.1000    0.0171
     3        1.2817             nan     0.1000    0.0139
     4        1.2584             nan     0.1000    0.0114
     5        1.2384             nan     0.1000    0.0099
     6        1.2207             nan     0.1000    0.0088
     7        1.2051             nan     0.1000    0.0079
     8        1.1898             nan     0.1000    0.0075
     9        1.1774             nan     0.1000    0.0061
    10        1.1662             nan     0.1000    0.0055
    20        1.0882             nan     0.1000    0.0022
    40        1.0331             nan     0.1000    0.0006
    60        1.0159             nan     0.1000    0.0002
    80        1.0090             nan     0.1000    0.0001
   100        1.0058             nan     0.1000   -0.0000
   120        1.0037             nan     0.1000   -0.0000
   140        1.0019             nan     0.1000   -0.0000
   150        1.0012             nan     0.1000   -0.0000

Iter   TrainDeviance   ValidDeviance   StepSize   Improve
     1        1.3444             nan     0.1000    0.0211
     2        1.3101             nan     0.1000    0.0172
     3        1.2821             nan     0.1000    0.0141
     4        1.2589             nan     0.1000    0.0116
     5        1.2385             nan     0.1000    0.0099
     6        1.2214             nan     0.1000    0.0086
     7        1.2044             nan     0.1000    0.0084
     8        1.1894             nan     0.1000    0.0073
     9        1.1759             nan     0.1000    0.0065
    10        1.1643             nan     0.1000    0.0056
    20        1.0841             nan     0.1000    0.0026
    40        1.0317             nan     0.1000    0.0008
    60        1.0146             nan     0.1000    0.0001
    80        1.0074             nan     0.1000    0.0001
   100        1.0042             nan     0.1000    0.0000
   120        1.0022             nan     0.1000   -0.0000
   140        1.0006             nan     0.1000   -0.0000
   150        1.0000             nan     0.1000   -0.0000
gbm_model
Stochastic Gradient Boosting 

56554 samples
   21 predictor
    2 classes: '0', '1' 

No pre-processing
Resampling: Cross-Validated (5 fold) 
Summary of sample sizes: 45244, 45242, 45244, 45243, 45243 
Resampling results across tuning parameters:

  interaction.depth  n.trees  Accuracy   Kappa    
  1                   50      0.7416098  0.4832197
  1                  100      0.7476218  0.4952437
  1                  150      0.7493016  0.4986033
  2                   50      0.7474626  0.4949254
  2                  100      0.7506100  0.5012201
  2                  150      0.7511582  0.5023165
  3                   50      0.7493369  0.4986740
  3                  100      0.7516710  0.5033421
  3                  150      0.7522368  0.5044738

Tuning parameter 'shrinkage' was held constant at a value of 0.1
Tuning parameter 'n.minobsinnode' was held constant
 at a value of 10
Accuracy was used to select the optimal model using the largest value.
The final values used for the model were n.trees = 150, interaction.depth = 3, shrinkage = 0.1 and n.minobsinnode = 10.
gbm_predictions <- predict(gbm_model, newdata = diabetes_test_data, na.action = na.pass)
gbm_confusion_matrix_class0 <- confusionMatrix(gbm_predictions, true_labels, mode='everything')
print(gbm_confusion_matrix_class0)
Confusion Matrix and Statistics

          Reference
Prediction    0    1
         0 5026 1512
         1 2043 5557
                                          
               Accuracy : 0.7486          
                 95% CI : (0.7413, 0.7557)
    No Information Rate : 0.5             
    P-Value [Acc > NIR] : < 2.2e-16       
                                          
                  Kappa : 0.4971          
                                          
 Mcnemar's Test P-Value : < 2.2e-16       
                                          
            Sensitivity : 0.7110          
            Specificity : 0.7861          
         Pos Pred Value : 0.7687          
         Neg Pred Value : 0.7312          
              Precision : 0.7687          
                 Recall : 0.7110          
                     F1 : 0.7387          
             Prevalence : 0.5000          
         Detection Rate : 0.3555          
   Detection Prevalence : 0.4624          
      Balanced Accuracy : 0.7486          
                                          
       'Positive' Class : 0               
                                          
cat("\n\n")
gbm_confusion_matrix_class1 <- confusionMatrix(gbm_predictions, true_labels, mode='everything', positive='1')
print(gbm_confusion_matrix_class1)
Confusion Matrix and Statistics

          Reference
Prediction    0    1
         0 5057 1478
         1 2012 5591
                                         
               Accuracy : 0.7531         
                 95% CI : (0.746, 0.7602)
    No Information Rate : 0.5            
    P-Value [Acc > NIR] : < 2.2e-16      
                                         
                  Kappa : 0.5063         
                                         
 Mcnemar's Test P-Value : < 2.2e-16      
                                         
            Sensitivity : 0.7909         
            Specificity : 0.7154         
         Pos Pred Value : 0.7354         
         Neg Pred Value : 0.7738         
              Precision : 0.7354         
                 Recall : 0.7909         
                     F1 : 0.7621         
             Prevalence : 0.5000         
         Detection Rate : 0.3955         
   Detection Prevalence : 0.5378         
      Balanced Accuracy : 0.7531         
                                         
       'Positive' Class : 1              
                                         
cat("\n\n")

SVM Linear Model

library(caret)
set.seed(1)
tr = trainControl(method = "cv", number = 5, preProc = "nzv")
svm_linear_model <- train(Diabetes_binary ~ ., data = diabetes_train_data, method = "svmLinear", trControl = tr)
svm_linear_model
Support Vector Machines with Linear Kernel 

56554 samples
   21 predictor
    2 classes: '0', '1' 

No pre-processing
Resampling: Cross-Validated (5 fold) 
Summary of sample sizes: 45244, 45242, 45244, 45243, 45243 
Resampling results:

  Accuracy   Kappa    
  0.7472151  0.4944303

Tuning parameter 'C' was held constant at a value of 1
svm_linear_predictions <- predict(svm_linear_model, newdata = diabetes_test_data, na.action = na.pass)
svm_linear_confusion_matrix_class0 <- confusionMatrix(svm_linear_predictions, true_labels, mode='everything')
print(svm_linear_confusion_matrix_class0)
Confusion Matrix and Statistics

          Reference
Prediction    0    1
         0 4935 1474
         1 2134 5595
                                         
               Accuracy : 0.7448         
                 95% CI : (0.7375, 0.752)
    No Information Rate : 0.5            
    P-Value [Acc > NIR] : < 2.2e-16      
                                         
                  Kappa : 0.4896         
                                         
 Mcnemar's Test P-Value : < 2.2e-16      
                                         
            Sensitivity : 0.6981         
            Specificity : 0.7915         
         Pos Pred Value : 0.7700         
         Neg Pred Value : 0.7239         
              Precision : 0.7700         
                 Recall : 0.6981         
                     F1 : 0.7323         
             Prevalence : 0.5000         
         Detection Rate : 0.3491         
   Detection Prevalence : 0.4533         
      Balanced Accuracy : 0.7448         
                                         
       'Positive' Class : 0              
                                         
cat("\n\n")
svm_linear_confusion_matrix_class1 <- confusionMatrix(svm_linear_predictions, true_labels, mode='everything', positive='1')
print(svm_linear_confusion_matrix_class1)
Confusion Matrix and Statistics

          Reference
Prediction    0    1
         0 5025 1472
         1 2044 5597
                                          
               Accuracy : 0.7513          
                 95% CI : (0.7441, 0.7584)
    No Information Rate : 0.5             
    P-Value [Acc > NIR] : < 2.2e-16       
                                          
                  Kappa : 0.5026          
                                          
 Mcnemar's Test P-Value : < 2.2e-16       
                                          
            Sensitivity : 0.7918          
            Specificity : 0.7109          
         Pos Pred Value : 0.7325          
         Neg Pred Value : 0.7734          
              Precision : 0.7325          
                 Recall : 0.7918          
                     F1 : 0.7610          
             Prevalence : 0.5000          
         Detection Rate : 0.3959          
   Detection Prevalence : 0.5405          
      Balanced Accuracy : 0.7513          
                                          
       'Positive' Class : 1               
                                          
cat("\n\n")

SVM Radial Model

library(caret)
set.seed(1)
tr = trainControl(method = "cv", number = 5, preProc = "nzv")
svm_radial_model <- train(Diabetes_binary ~ ., data = diabetes_train_data, method = "svmRadial", trControl = tr)
svm_radial_model
Support Vector Machines with Radial Basis Function Kernel 

56554 samples
   21 predictor
    2 classes: '0', '1' 

No pre-processing
Resampling: Cross-Validated (5 fold) 
Summary of sample sizes: 45244, 45242, 45244, 45243, 45243 
Resampling results across tuning parameters:

  C     Accuracy   Kappa    
  0.25  0.7512644  0.5025290
  0.50  0.7515120  0.5030241
  1.00  0.7508047  0.5016095

Tuning parameter 'sigma' was held constant at a value of 0.03579394
Accuracy was used to select the optimal model using the largest value.
The final values used for the model were sigma = 0.03579394 and C = 0.5.
svm_radial_predictions <- predict(svm_radial_model, newdata = diabetes_test_data, na.action = na.pass)
svm_radial_confusion_matrix_class0 <- confusionMatrix(svm_radial_predictions, true_labels, mode='everything')
print(svm_radial_confusion_matrix_class0)
Confusion Matrix and Statistics

          Reference
Prediction    0    1
         0 4867 1250
         1 2202 5819
                                          
               Accuracy : 0.7558          
                 95% CI : (0.7487, 0.7629)
    No Information Rate : 0.5             
    P-Value [Acc > NIR] : < 2.2e-16       
                                          
                  Kappa : 0.5117          
                                          
 Mcnemar's Test P-Value : < 2.2e-16       
                                          
            Sensitivity : 0.6885          
            Specificity : 0.8232          
         Pos Pred Value : 0.7957          
         Neg Pred Value : 0.7255          
              Precision : 0.7957          
                 Recall : 0.6885          
                     F1 : 0.7382          
             Prevalence : 0.5000          
         Detection Rate : 0.3442          
   Detection Prevalence : 0.4327          
      Balanced Accuracy : 0.7558          
                                          
       'Positive' Class : 0               
                                          
cat("\n\n")
svm_radial_confusion_matrix_class1 <- confusionMatrix(svm_radial_predictions, true_labels, mode='everything', positive='1')
print(svm_radial_confusion_matrix_class1)
Confusion Matrix and Statistics

          Reference
Prediction    0    1
         0 4913 1356
         1 2156 5713
                                          
               Accuracy : 0.7516          
                 95% CI : (0.7444, 0.7587)
    No Information Rate : 0.5             
    P-Value [Acc > NIR] : < 2.2e-16       
                                          
                  Kappa : 0.5032          
                                          
 Mcnemar's Test P-Value : < 2.2e-16       
                                          
            Sensitivity : 0.8082          
            Specificity : 0.6950          
         Pos Pred Value : 0.7260          
         Neg Pred Value : 0.7837          
              Precision : 0.7260          
                 Recall : 0.8082          
                     F1 : 0.7649          
             Prevalence : 0.5000          
         Detection Rate : 0.4041          
   Detection Prevalence : 0.5566          
      Balanced Accuracy : 0.7516          
                                          
       'Positive' Class : 1               
                                          
cat("\n\n")

Compare all the sampled models

compare=resamples(list(KNN= knn_model, Lasso=lasso_model, Ridge=ridge_model, Enet=enet_model, RF=rf_model, GBM=gbm_model, SVML=svm_linear_model, SVMR=svm_radial_model))
summary(compare)

Call:
summary.resamples(object = compare)

Models: KNN, Lasso, Ridge, Enet, RF, GBM, SVML, SVMR 
Number of resamples: 5 

Accuracy 
           Min.   1st Qu.    Median      Mean   3rd Qu.      Max. NA's
KNN   0.7167359 0.7174182 0.7217753 0.7202850 0.7221289 0.7233666    0
Lasso 0.7454031 0.7464415 0.7481874 0.7473743 0.7483865 0.7484527    0
Ridge 0.7435467 0.7441429 0.7470604 0.7465963 0.7478338 0.7503979    0
Enet  0.7453147 0.7463531 0.7481213 0.7473743 0.7485411 0.7485411    0
RF    0.7440771 0.7488286 0.7488948 0.7489127 0.7490938 0.7536693    0
GBM   0.7511272 0.7512821 0.7515693 0.7522368 0.7518564 0.7553492    0
SVML  0.7449611 0.7459995 0.7477677 0.7472151 0.7478338 0.7495137    0
SVMR  0.7477016 0.7503315 0.7503979 0.7515120 0.7519229 0.7572060    0

Kappa 
           Min.   1st Qu.    Median      Mean   3rd Qu.      Max. NA's
KNN   0.4334757 0.4348364 0.4435539 0.4405701 0.4442544 0.4467301    0
Lasso 0.4908062 0.4928815 0.4963749 0.4947486 0.4967749 0.4969054    0
Ridge 0.4870934 0.4882843 0.4941224 0.4931927 0.4956676 0.5007958    0
Enet  0.4906294 0.4927047 0.4962444 0.4947486 0.4970822 0.4970822    0
RF    0.4881542 0.4976541 0.4977896 0.4978257 0.4981918 0.5073386    0
GBM   0.5022579 0.5025641 0.5031357 0.5044738 0.5037129 0.5106985    0
SVML  0.4899222 0.4920027 0.4955316 0.4944303 0.4956676 0.4990274    0
SVMR  0.4954031 0.5006585 0.5007958 0.5030241 0.5038513 0.5144120    0

Preprocessing for nueral network

library(caret)

parti_indices = createDataPartition(diabetes_train_data$Diabetes_binary, p=0.9, list = FALSE)
diabetes_index <- which(names(diabetes_train_data)=='Diabetes_binary')

diabetes_train_data1 = diabetes_train_data[parti_indices, -diabetes_index]
diabetes_train_data1
training_labels = diabetes_train_data[parti_indices, diabetes_index]
training_labels <- as.numeric(training_labels) - 1
training_labels
   [1] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
  [58] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 [115] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 [172] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 [229] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 [286] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 [343] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 [400] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 [457] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 [514] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 [571] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 [628] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 [685] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 [742] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 [799] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 [856] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 [913] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 [970] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 [ reached getOption("max.print") -- omitted 49900 entries ]
diabetes_validation_data = diabetes_train_data[-parti_indices, -diabetes_index]
diabetes_validation_data
validation_labels = diabetes_train_data[-parti_indices, diabetes_index]
validation_labels <- as.numeric(validation_labels) - 1
validation_labels
   [1] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
  [58] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 [115] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 [172] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 [229] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 [286] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 [343] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 [400] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 [457] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 [514] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 [571] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 [628] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 [685] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 [742] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 [799] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 [856] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 [913] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 [970] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 [ reached getOption("max.print") -- omitted 4654 entries ]
diabetes_testing_data = diabetes_test_data[, -diabetes_index]
diabetes_testing_data
test_labels = as.numeric(diabetes_test_data[,diabetes_index]) - 1
test_labels
   [1] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
  [58] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 [115] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 [172] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 [229] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 [286] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 [343] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 [400] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 [457] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 [514] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 [571] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 [628] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 [685] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 [742] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 [799] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 [856] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 [913] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 [970] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 [ reached getOption("max.print") -- omitted 13138 entries ]
library(caret)

str(diabetes_train_data1)
'data.frame':   50900 obs. of  21 variables:
 $ HighBP              : Factor w/ 2 levels "0","1": 2 1 2 1 1 1 1 2 2 2 ...
 $ HighChol            : Factor w/ 2 levels "0","1": 1 1 2 1 2 1 1 2 2 1 ...
 $ CholCheck           : Factor w/ 2 levels "0","1": 2 2 2 2 2 2 2 2 2 2 ...
 $ BMI                 : num  26 26 28 29 26 32 27 24 27 58 ...
 $ Smoker              : Factor w/ 2 levels "0","1": 1 1 2 2 2 1 2 2 1 1 ...
 $ Stroke              : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
 $ HeartDiseaseorAttack: Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 2 1 1 ...
 $ PhysActivity        : Factor w/ 2 levels "0","1": 2 2 2 2 2 2 1 2 2 1 ...
 $ Fruits              : Factor w/ 2 levels "0","1": 1 2 2 2 2 2 2 2 2 2 ...
 $ Veggies             : Factor w/ 2 levels "0","1": 2 2 2 2 2 2 2 2 2 2 ...
 $ HvyAlcoholConsump   : Factor w/ 2 levels "0","1": 1 1 1 1 2 1 1 1 1 1 ...
 $ AnyHealthcare       : Factor w/ 2 levels "0","1": 2 2 2 2 2 2 2 2 2 2 ...
 $ NoDocbcCost         : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
 $ GenHlth             : num  3 1 3 2 1 3 3 3 2 3 ...
 $ MentHlth            : num  5 0 0 0 0 0 0 0 0 3 ...
 $ PhysHlth            : num  30 10 3 0 0 0 6 4 0 3 ...
 $ DiffWalk            : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
 $ Sex                 : Factor w/ 2 levels "0","1": 2 2 2 1 2 1 2 1 2 2 ...
 $ Age                 : num  4 13 11 8 13 3 6 12 7 10 ...
 $ Education           : num  6 6 6 5 5 6 4 4 6 4 ...
 $ Income              : num  8 8 8 8 6 8 4 6 8 6 ...
preproc <- preProcess(diabetes_train_data1, method = c("center", "scale"))

train_imputed <- predict(preproc, diabetes_train_data1)

str(train_imputed)
'data.frame':   50900 obs. of  21 variables:
 $ HighBP              : Factor w/ 2 levels "0","1": 2 1 2 1 1 1 1 2 2 2 ...
 $ HighChol            : Factor w/ 2 levels "0","1": 1 1 2 1 2 1 1 2 2 1 ...
 $ CholCheck           : Factor w/ 2 levels "0","1": 2 2 2 2 2 2 2 2 2 2 ...
 $ BMI                 : num  -0.542 -0.542 -0.262 -0.121 -0.542 ...
 $ Smoker              : Factor w/ 2 levels "0","1": 1 1 2 2 2 1 2 2 1 1 ...
 $ Stroke              : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
 $ HeartDiseaseorAttack: Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 2 1 1 ...
 $ PhysActivity        : Factor w/ 2 levels "0","1": 2 2 2 2 2 2 1 2 2 1 ...
 $ Fruits              : Factor w/ 2 levels "0","1": 1 2 2 2 2 2 2 2 2 2 ...
 $ Veggies             : Factor w/ 2 levels "0","1": 2 2 2 2 2 2 2 2 2 2 ...
 $ HvyAlcoholConsump   : Factor w/ 2 levels "0","1": 1 1 1 1 2 1 1 1 1 1 ...
 $ AnyHealthcare       : Factor w/ 2 levels "0","1": 2 2 2 2 2 2 2 2 2 2 ...
 $ NoDocbcCost         : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
 $ GenHlth             : num  0.145 -1.649 0.145 -0.752 -1.649 ...
 $ MentHlth            : num  0.154 -0.46 -0.46 -0.46 -0.46 ...
 $ PhysHlth            : num  2.412 0.419 -0.278 -0.577 -0.577 ...
 $ DiffWalk            : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
 $ Sex                 : Factor w/ 2 levels "0","1": 2 2 2 1 2 1 2 1 2 2 ...
 $ Age                 : num  -1.6 1.55 0.85 -0.2 1.55 ...
 $ Education           : num  1.049 1.049 1.049 0.0775 0.0775 ...
 $ Income              : num  1.061 1.061 1.061 1.061 0.142 ...
train_imputed
test_imputed <- predict(preproc, diabetes_testing_data)
test_imputed
val_imputed <- predict(preproc, diabetes_validation_data)
val_imputed
library(data.table)
library(mltools)

train_encoded <- one_hot(as.data.table(train_imputed), dropUnusedLevels = FALSE)
train_encoded
test_encoded <- one_hot(as.data.table(test_imputed), dropUnusedLevels = FALSE)
test_encoded
val_encoded <- one_hot(as.data.table(val_imputed), dropUnusedLevels = FALSE)
val_encoded
nzv_indices <- nearZeroVar(train_encoded, saveMetrics= TRUE)

train_encoded_nzv <- train_encoded[,-nzv_indices]
val_encoded_nzv <- val_encoded[,-nzv_indices]
test_encoded_nzv <- test_encoded[,-nzv_indices]
train_encoded
val_encoded
test_encoded
library(keras)

model <- keras_model_sequential() %>%
  layer_dense(units = 32, activation = "relu", input_shape = dim(train_encoded)[2]) %>%
  layer_dropout(rate = 0.2) %>%
  layer_dense(units = 16, activation = "relu") %>%
  layer_dropout(rate = 0.2) %>%
  layer_dense(units = 1, activation = "sigmoid")

model %>% compile(
  loss = "binary_crossentropy",
  optimizer = "adam"
)
 
history <- model %>% fit(as.matrix(train_encoded), training_labels,
  epochs = 20,
  batch_size = 20, verbose=2,
  validation_data = list(as.matrix(val_encoded), validation_labels)
)
Epoch 1/20
2545/2545 - 4s - loss: 0.5373 - val_loss: 0.4990 - 4s/epoch - 2ms/step
Epoch 2/20
2545/2545 - 3s - loss: 0.5190 - val_loss: 0.5035 - 3s/epoch - 1ms/step
Epoch 3/20
2545/2545 - 3s - loss: 0.5149 - val_loss: 0.4970 - 3s/epoch - 1ms/step
Epoch 4/20
2545/2545 - 3s - loss: 0.5132 - val_loss: 0.4960 - 3s/epoch - 1ms/step
Epoch 5/20
2545/2545 - 3s - loss: 0.5118 - val_loss: 0.4958 - 3s/epoch - 1ms/step
Epoch 6/20
2545/2545 - 3s - loss: 0.5105 - val_loss: 0.4961 - 3s/epoch - 1ms/step
Epoch 7/20
2545/2545 - 3s - loss: 0.5104 - val_loss: 0.4964 - 3s/epoch - 1ms/step
Epoch 8/20
2545/2545 - 3s - loss: 0.5100 - val_loss: 0.4963 - 3s/epoch - 1ms/step
Epoch 9/20
2545/2545 - 3s - loss: 0.5100 - val_loss: 0.4979 - 3s/epoch - 1ms/step
Epoch 10/20
2545/2545 - 3s - loss: 0.5087 - val_loss: 0.4967 - 3s/epoch - 1ms/step
Epoch 11/20
2545/2545 - 3s - loss: 0.5073 - val_loss: 0.4946 - 3s/epoch - 1ms/step
Epoch 12/20
2545/2545 - 3s - loss: 0.5080 - val_loss: 0.4976 - 3s/epoch - 1ms/step
Epoch 13/20
2545/2545 - 3s - loss: 0.5074 - val_loss: 0.4951 - 3s/epoch - 1ms/step
Epoch 14/20
2545/2545 - 3s - loss: 0.5070 - val_loss: 0.4962 - 3s/epoch - 1ms/step
Epoch 15/20
2545/2545 - 3s - loss: 0.5075 - val_loss: 0.4940 - 3s/epoch - 1ms/step
Epoch 16/20
2545/2545 - 3s - loss: 0.5076 - val_loss: 0.4954 - 3s/epoch - 1ms/step
Epoch 17/20
2545/2545 - 3s - loss: 0.5069 - val_loss: 0.4977 - 3s/epoch - 1ms/step
Epoch 18/20
2545/2545 - 3s - loss: 0.5068 - val_loss: 0.4967 - 3s/epoch - 1ms/step
Epoch 19/20
2545/2545 - 3s - loss: 0.5066 - val_loss: 0.5032 - 3s/epoch - 1ms/step
Epoch 20/20
2545/2545 - 3s - loss: 0.5074 - val_loss: 0.4948 - 3s/epoch - 1ms/step
knitr::include_graphics("final_plot1.png")

predictions <- model %>% predict(as.matrix(test_encoded))

  1/442 [..............................] - ETA: 41s
 71/442 [===>..........................] - ETA: 0s 
142/442 [========>.....................] - ETA: 0s
218/442 [=============>................] - ETA: 0s
287/442 [==================>...........] - ETA: 0s
363/442 [=======================>......] - ETA: 0s
441/442 [============================>.] - ETA: 0s
442/442 [==============================] - 0s 705us/step

442/442 [==============================] - 0s 705us/step
predicted_labels <- as.factor(ifelse(predictions <= 0.5, 0, 1)[,1])
confusion_matrix_class0 <- confusionMatrix(predicted_labels, as.factor(test_labels), mode='everything')
print(confusion_matrix_class0)
Confusion Matrix and Statistics

          Reference
Prediction    0    1
         0 5089 1499
         1 1980 5570
                                         
               Accuracy : 0.7539         
                 95% CI : (0.7467, 0.761)
    No Information Rate : 0.5            
    P-Value [Acc > NIR] : < 2.2e-16      
                                         
                  Kappa : 0.5079         
                                         
 Mcnemar's Test P-Value : 4.021e-16      
                                         
            Sensitivity : 0.7199         
            Specificity : 0.7879         
         Pos Pred Value : 0.7725         
         Neg Pred Value : 0.7377         
              Precision : 0.7725         
                 Recall : 0.7199         
                     F1 : 0.7453         
             Prevalence : 0.5000         
         Detection Rate : 0.3600         
   Detection Prevalence : 0.4660         
      Balanced Accuracy : 0.7539         
                                         
       'Positive' Class : 0              
                                         
confusion_matrix_class1 <- confusionMatrix(predicted_labels, as.factor(test_labels), mode='everything', positive='1')
print(confusion_matrix_class1)
Confusion Matrix and Statistics

          Reference
Prediction    0    1
         0 5089 1499
         1 1980 5570
                                         
               Accuracy : 0.7539         
                 95% CI : (0.7467, 0.761)
    No Information Rate : 0.5            
    P-Value [Acc > NIR] : < 2.2e-16      
                                         
                  Kappa : 0.5079         
                                         
 Mcnemar's Test P-Value : 4.021e-16      
                                         
            Sensitivity : 0.7879         
            Specificity : 0.7199         
         Pos Pred Value : 0.7377         
         Neg Pred Value : 0.7725         
              Precision : 0.7377         
                 Recall : 0.7879         
                     F1 : 0.7620         
             Prevalence : 0.5000         
         Detection Rate : 0.3940         
   Detection Prevalence : 0.5340         
      Balanced Accuracy : 0.7539         
                                         
       'Positive' Class : 1              
                                         
library(keras)
library(tfruns)

runs= tuning_run("tuning_script_5.R",
                 flags=list(
                 learning_rate=c(0.1, 0.5, 0.01, 0.001),
                 units1=c(8, 16, 32, 64, 128, 512),
                 units2=c(8, 16, 32, 64, 128),
                 units2=c(8, 16, 32, 64, 128),
                 batch_size=c(8,16, 32, 64),
                 dropout=c(0.1, 0.2, 0.3, 0.4, 0.5)
                 ),
                 sample= 0.001
)
12,000 total combinations of flags 
(sampled to 12 combinations)
y
Training run 1/12 (flags = list(0.01, 64, 8, 16, 16, 0.1)) 
Using run directory runs/2023-05-10T03-43-50Z

> FLAGS= flags(
+   flag_numeric("learning_rate", 0.01),
+   flag_numeric("units1", 32),
+   flag_numeric('units2', 16),
+   flag_numeric('units3', 8) .... [TRUNCATED] 

> model2 <- keras_model_sequential() %>%
+   layer_dense(units = FLAGS$units1, activation = "relu",
+               input_shape = dim(train_encoded[]) .... [TRUNCATED] 

> opt= optimizer_adam(learning_rate= FLAGS$learning_rate)

> model2 %>% compile(
+   loss = "binary_crossentropy",
+   optimizer = opt )

> history <- model %>% fit(as.matrix(train_encoded), training_labels,
+                          epochs = 20,
+                          batch_size =  .... [TRUNCATED] 
Epoch 1/20
WARNING:tensorflow:Callback method `on_train_batch_end` is slow compared to the batch time (batch time: 0.0008s vs `on_train_batch_end` time: 0.0036s). Check your callbacks.
2545/2545 - 3s - loss: 0.5070 - val_loss: 0.4961 - 3s/epoch - 1ms/step
Epoch 2/20
2545/2545 - 3s - loss: 0.5062 - val_loss: 0.4960 - 3s/epoch - 1ms/step
Epoch 3/20
2545/2545 - 3s - loss: 0.5056 - val_loss: 0.4959 - 3s/epoch - 1ms/step
Epoch 4/20
2545/2545 - 2s - loss: 0.5049 - val_loss: 0.4991 - 2s/epoch - 972us/step
Epoch 5/20
2545/2545 - 3s - loss: 0.5055 - val_loss: 0.4972 - 3s/epoch - 1ms/step
Epoch 6/20
2545/2545 - 3s - loss: 0.5049 - val_loss: 0.4966 - 3s/epoch - 1ms/step
Epoch 7/20
2545/2545 - 3s - loss: 0.5053 - val_loss: 0.4962 - 3s/epoch - 1ms/step
Epoch 8/20
2545/2545 - 3s - loss: 0.5039 - val_loss: 0.4964 - 3s/epoch - 1ms/step
Epoch 9/20
2545/2545 - 3s - loss: 0.5053 - val_loss: 0.4994 - 3s/epoch - 1ms/step
Epoch 10/20
2545/2545 - 3s - loss: 0.5054 - val_loss: 0.4978 - 3s/epoch - 1ms/step
Epoch 11/20
2545/2545 - 3s - loss: 0.5044 - val_loss: 0.4972 - 3s/epoch - 1ms/step
Epoch 12/20
2545/2545 - 3s - loss: 0.5049 - val_loss: 0.4971 - 3s/epoch - 1ms/step
Epoch 13/20
2545/2545 - 3s - loss: 0.5045 - val_loss: 0.4980 - 3s/epoch - 1ms/step
Epoch 14/20
2545/2545 - 3s - loss: 0.5036 - val_loss: 0.4997 - 3s/epoch - 1ms/step
Epoch 15/20
2545/2545 - 3s - loss: 0.5044 - val_loss: 0.4978 - 3s/epoch - 1ms/step
Epoch 16/20
2545/2545 - 3s - loss: 0.5049 - val_loss: 0.5000 - 3s/epoch - 1ms/step
Epoch 17/20
2545/2545 - 3s - loss: 0.5043 - val_loss: 0.5012 - 3s/epoch - 1ms/step
Epoch 18/20
2545/2545 - 3s - loss: 0.5039 - val_loss: 0.4979 - 3s/epoch - 1ms/step
Epoch 19/20
2545/2545 - 3s - loss: 0.5043 - val_loss: 0.4973 - 3s/epoch - 1ms/step
Epoch 20/20
2545/2545 - 3s - loss: 0.5045 - val_loss: 0.4995 - 3s/epoch - 1ms/step

Run completed: runs/2023-05-10T03-43-50Z

Training run 2/12 (flags = list(0.1, 128, 16, 128, 8, 0.5)) 
Using run directory runs/2023-05-10T03-44-47Z

> FLAGS= flags(
+   flag_numeric("learning_rate", 0.01),
+   flag_numeric("units1", 32),
+   flag_numeric('units2', 16),
+   flag_numeric('units3', 8) .... [TRUNCATED] 

> model2 <- keras_model_sequential() %>%
+   layer_dense(units = FLAGS$units1, activation = "relu",
+               input_shape = dim(train_encoded[]) .... [TRUNCATED] 

> opt= optimizer_adam(learning_rate= FLAGS$learning_rate)

> model2 %>% compile(
+   loss = "binary_crossentropy",
+   optimizer = opt )

> history <- model %>% fit(as.matrix(train_encoded), training_labels,
+                          epochs = 20,
+                          batch_size =  .... [TRUNCATED] 
Epoch 1/20
WARNING:tensorflow:Callback method `on_train_batch_end` is slow compared to the batch time (batch time: 0.0008s vs `on_train_batch_end` time: 0.0029s). Check your callbacks.
2545/2545 - 3s - loss: 0.5032 - val_loss: 0.4974 - 3s/epoch - 1ms/step
Epoch 2/20
2545/2545 - 3s - loss: 0.5029 - val_loss: 0.4981 - 3s/epoch - 1ms/step
Epoch 3/20
2545/2545 - 3s - loss: 0.5046 - val_loss: 0.4980 - 3s/epoch - 1ms/step
Epoch 4/20
2545/2545 - 3s - loss: 0.5034 - val_loss: 0.4983 - 3s/epoch - 1ms/step
Epoch 5/20
2545/2545 - 3s - loss: 0.5036 - val_loss: 0.4996 - 3s/epoch - 1ms/step
Epoch 6/20
2545/2545 - 3s - loss: 0.5039 - val_loss: 0.4991 - 3s/epoch - 1ms/step
Epoch 7/20
2545/2545 - 3s - loss: 0.5033 - val_loss: 0.4976 - 3s/epoch - 1ms/step
Epoch 8/20
2545/2545 - 3s - loss: 0.5032 - val_loss: 0.4990 - 3s/epoch - 1ms/step
Epoch 9/20
2545/2545 - 3s - loss: 0.5035 - val_loss: 0.4970 - 3s/epoch - 1ms/step
Epoch 10/20
2545/2545 - 3s - loss: 0.5028 - val_loss: 0.5010 - 3s/epoch - 1ms/step
Epoch 11/20
2545/2545 - 3s - loss: 0.5036 - val_loss: 0.4981 - 3s/epoch - 1ms/step
Epoch 12/20
2545/2545 - 3s - loss: 0.5033 - val_loss: 0.4996 - 3s/epoch - 1ms/step
Epoch 13/20
2545/2545 - 3s - loss: 0.5040 - val_loss: 0.4987 - 3s/epoch - 1ms/step
Epoch 14/20
2545/2545 - 3s - loss: 0.5037 - val_loss: 0.4991 - 3s/epoch - 1ms/step
Epoch 15/20
2545/2545 - 3s - loss: 0.5036 - val_loss: 0.4993 - 3s/epoch - 1ms/step
Epoch 16/20
2545/2545 - 3s - loss: 0.5040 - val_loss: 0.5008 - 3s/epoch - 1ms/step
Epoch 17/20
2545/2545 - 3s - loss: 0.5023 - val_loss: 0.4993 - 3s/epoch - 1ms/step
Epoch 18/20
2545/2545 - 3s - loss: 0.5029 - val_loss: 0.4996 - 3s/epoch - 1ms/step
Epoch 19/20
2545/2545 - 3s - loss: 0.5025 - val_loss: 0.4989 - 3s/epoch - 1ms/step
Epoch 20/20
2545/2545 - 3s - loss: 0.5015 - val_loss: 0.4996 - 3s/epoch - 1ms/step

Run completed: runs/2023-05-10T03-44-47Z

Training run 3/12 (flags = list(0.01, 32, 16, 32, 16, 0.3)) 
Using run directory runs/2023-05-10T03-45-44Z

> FLAGS= flags(
+   flag_numeric("learning_rate", 0.01),
+   flag_numeric("units1", 32),
+   flag_numeric('units2', 16),
+   flag_numeric('units3', 8) .... [TRUNCATED] 

> model2 <- keras_model_sequential() %>%
+   layer_dense(units = FLAGS$units1, activation = "relu",
+               input_shape = dim(train_encoded[]) .... [TRUNCATED] 

> opt= optimizer_adam(learning_rate= FLAGS$learning_rate)

> model2 %>% compile(
+   loss = "binary_crossentropy",
+   optimizer = opt )

> history <- model %>% fit(as.matrix(train_encoded), training_labels,
+                          epochs = 20,
+                          batch_size =  .... [TRUNCATED] 
Epoch 1/20
WARNING:tensorflow:Callback method `on_train_batch_end` is slow compared to the batch time (batch time: 0.0008s vs `on_train_batch_end` time: 0.0031s). Check your callbacks.
2545/2545 - 4s - loss: 0.5028 - val_loss: 0.5007 - 4s/epoch - 1ms/step
Epoch 2/20
2545/2545 - 3s - loss: 0.5028 - val_loss: 0.4990 - 3s/epoch - 1ms/step
Epoch 3/20
2545/2545 - 3s - loss: 0.5029 - val_loss: 0.5055 - 3s/epoch - 1ms/step
Epoch 4/20
2545/2545 - 3s - loss: 0.5024 - val_loss: 0.4978 - 3s/epoch - 1ms/step
Epoch 5/20
2545/2545 - 3s - loss: 0.5029 - val_loss: 0.4980 - 3s/epoch - 1ms/step
Epoch 6/20
2545/2545 - 3s - loss: 0.5024 - val_loss: 0.4980 - 3s/epoch - 1ms/step
Epoch 7/20
2545/2545 - 3s - loss: 0.5019 - val_loss: 0.4985 - 3s/epoch - 1ms/step
Epoch 8/20
2545/2545 - 3s - loss: 0.5019 - val_loss: 0.4973 - 3s/epoch - 1ms/step
Epoch 9/20
2545/2545 - 3s - loss: 0.5033 - val_loss: 0.4996 - 3s/epoch - 1ms/step
Epoch 10/20
2545/2545 - 3s - loss: 0.5023 - val_loss: 0.4999 - 3s/epoch - 1ms/step
Epoch 11/20
2545/2545 - 3s - loss: 0.5023 - val_loss: 0.4994 - 3s/epoch - 1ms/step
Epoch 12/20
2545/2545 - 3s - loss: 0.5023 - val_loss: 0.4995 - 3s/epoch - 1ms/step
Epoch 13/20
2545/2545 - 3s - loss: 0.5014 - val_loss: 0.4978 - 3s/epoch - 1ms/step
Epoch 14/20
2545/2545 - 3s - loss: 0.5023 - val_loss: 0.4983 - 3s/epoch - 1ms/step
Epoch 15/20
2545/2545 - 3s - loss: 0.5028 - val_loss: 0.4988 - 3s/epoch - 1ms/step
Epoch 16/20
2545/2545 - 3s - loss: 0.5015 - val_loss: 0.5020 - 3s/epoch - 1ms/step
Epoch 17/20
2545/2545 - 3s - loss: 0.5031 - val_loss: 0.4994 - 3s/epoch - 1ms/step
Epoch 18/20
2545/2545 - 3s - loss: 0.5025 - val_loss: 0.5030 - 3s/epoch - 1ms/step
Epoch 19/20
2545/2545 - 3s - loss: 0.5024 - val_loss: 0.4991 - 3s/epoch - 1ms/step
Epoch 20/20
2545/2545 - 3s - loss: 0.5018 - val_loss: 0.4992 - 3s/epoch - 1ms/step

Run completed: runs/2023-05-10T03-45-44Z

Training run 4/12 (flags = list(0.01, 16, 8, 32, 8, 0.4)) 
Using run directory runs/2023-05-10T03-46-43Z

> FLAGS= flags(
+   flag_numeric("learning_rate", 0.01),
+   flag_numeric("units1", 32),
+   flag_numeric('units2', 16),
+   flag_numeric('units3', 8) .... [TRUNCATED] 

> model2 <- keras_model_sequential() %>%
+   layer_dense(units = FLAGS$units1, activation = "relu",
+               input_shape = dim(train_encoded[]) .... [TRUNCATED] 

> opt= optimizer_adam(learning_rate= FLAGS$learning_rate)

> model2 %>% compile(
+   loss = "binary_crossentropy",
+   optimizer = opt )

> history <- model %>% fit(as.matrix(train_encoded), training_labels,
+                          epochs = 20,
+                          batch_size =  .... [TRUNCATED] 
Epoch 1/20
WARNING:tensorflow:Callback method `on_train_batch_end` is slow compared to the batch time (batch time: 0.0008s vs `on_train_batch_end` time: 0.0028s). Check your callbacks.
2545/2545 - 3s - loss: 0.5016 - val_loss: 0.4983 - 3s/epoch - 1ms/step
Epoch 2/20
2545/2545 - 3s - loss: 0.5018 - val_loss: 0.4998 - 3s/epoch - 1ms/step
Epoch 3/20
2545/2545 - 3s - loss: 0.5027 - val_loss: 0.4997 - 3s/epoch - 1ms/step
Epoch 4/20
2545/2545 - 3s - loss: 0.5014 - val_loss: 0.4966 - 3s/epoch - 1ms/step
Epoch 5/20
2545/2545 - 3s - loss: 0.5028 - val_loss: 0.4982 - 3s/epoch - 1ms/step
Epoch 6/20
2545/2545 - 3s - loss: 0.5014 - val_loss: 0.4999 - 3s/epoch - 1ms/step
Epoch 7/20
2545/2545 - 3s - loss: 0.5015 - val_loss: 0.4988 - 3s/epoch - 1ms/step
Epoch 8/20
2545/2545 - 3s - loss: 0.5021 - val_loss: 0.4994 - 3s/epoch - 1ms/step
Epoch 9/20
2545/2545 - 3s - loss: 0.5018 - val_loss: 0.5001 - 3s/epoch - 1ms/step
Epoch 10/20
2545/2545 - 3s - loss: 0.5015 - val_loss: 0.4991 - 3s/epoch - 1ms/step
Epoch 11/20
2545/2545 - 3s - loss: 0.5013 - val_loss: 0.4974 - 3s/epoch - 1ms/step
Epoch 12/20
2545/2545 - 3s - loss: 0.5018 - val_loss: 0.4985 - 3s/epoch - 1ms/step
Epoch 13/20
2545/2545 - 3s - loss: 0.5024 - val_loss: 0.4979 - 3s/epoch - 1ms/step
Epoch 14/20
2545/2545 - 3s - loss: 0.5015 - val_loss: 0.4981 - 3s/epoch - 1ms/step
Epoch 15/20
2545/2545 - 3s - loss: 0.5002 - val_loss: 0.4981 - 3s/epoch - 1ms/step
Epoch 16/20
2545/2545 - 3s - loss: 0.5014 - val_loss: 0.4996 - 3s/epoch - 1ms/step
Epoch 17/20
2545/2545 - 3s - loss: 0.5024 - val_loss: 0.4974 - 3s/epoch - 1ms/step
Epoch 18/20
2545/2545 - 3s - loss: 0.5010 - val_loss: 0.4990 - 3s/epoch - 1ms/step
Epoch 19/20
2545/2545 - 3s - loss: 0.5025 - val_loss: 0.5016 - 3s/epoch - 1ms/step
Epoch 20/20
2545/2545 - 3s - loss: 0.5013 - val_loss: 0.5002 - 3s/epoch - 1ms/step

Run completed: runs/2023-05-10T03-46-43Z

Training run 5/12 (flags = list(0.001, 512, 64, 128, 32, 0.3)) 
Using run directory runs/2023-05-10T03-47-40Z

> FLAGS= flags(
+   flag_numeric("learning_rate", 0.01),
+   flag_numeric("units1", 32),
+   flag_numeric('units2', 16),
+   flag_numeric('units3', 8) .... [TRUNCATED] 

> model2 <- keras_model_sequential() %>%
+   layer_dense(units = FLAGS$units1, activation = "relu",
+               input_shape = dim(train_encoded[]) .... [TRUNCATED] 

> opt= optimizer_adam(learning_rate= FLAGS$learning_rate)

> model2 %>% compile(
+   loss = "binary_crossentropy",
+   optimizer = opt )

> history <- model %>% fit(as.matrix(train_encoded), training_labels,
+                          epochs = 20,
+                          batch_size =  .... [TRUNCATED] 
Epoch 1/20
WARNING:tensorflow:Callback method `on_train_batch_end` is slow compared to the batch time (batch time: 0.0008s vs `on_train_batch_end` time: 0.0029s). Check your callbacks.
2545/2545 - 3s - loss: 0.5015 - val_loss: 0.5003 - 3s/epoch - 1ms/step
Epoch 2/20
2545/2545 - 3s - loss: 0.5016 - val_loss: 0.4982 - 3s/epoch - 1ms/step
Epoch 3/20
2545/2545 - 3s - loss: 0.5012 - val_loss: 0.4992 - 3s/epoch - 1ms/step
Epoch 4/20
2545/2545 - 3s - loss: 0.5017 - val_loss: 0.4988 - 3s/epoch - 1ms/step
Epoch 5/20
2545/2545 - 3s - loss: 0.5011 - val_loss: 0.5000 - 3s/epoch - 1ms/step
Epoch 6/20
2545/2545 - 3s - loss: 0.5015 - val_loss: 0.4995 - 3s/epoch - 1ms/step
Epoch 7/20
2545/2545 - 3s - loss: 0.5023 - val_loss: 0.5003 - 3s/epoch - 1ms/step
Epoch 8/20
2545/2545 - 3s - loss: 0.5014 - val_loss: 0.4984 - 3s/epoch - 1ms/step
Epoch 9/20
2545/2545 - 3s - loss: 0.5015 - val_loss: 0.4990 - 3s/epoch - 1ms/step
Epoch 10/20
2545/2545 - 3s - loss: 0.5013 - val_loss: 0.4997 - 3s/epoch - 1ms/step
Epoch 11/20
2545/2545 - 3s - loss: 0.5014 - val_loss: 0.4999 - 3s/epoch - 1ms/step
Epoch 12/20
2545/2545 - 3s - loss: 0.5023 - val_loss: 0.4982 - 3s/epoch - 1ms/step
Epoch 13/20
2545/2545 - 3s - loss: 0.5006 - val_loss: 0.5003 - 3s/epoch - 1ms/step
Epoch 14/20
2545/2545 - 3s - loss: 0.5018 - val_loss: 0.4998 - 3s/epoch - 1ms/step
Epoch 15/20
2545/2545 - 3s - loss: 0.5016 - val_loss: 0.5014 - 3s/epoch - 1ms/step
Epoch 16/20
2545/2545 - 3s - loss: 0.5016 - val_loss: 0.4997 - 3s/epoch - 1ms/step
Epoch 17/20
2545/2545 - 3s - loss: 0.5012 - val_loss: 0.4999 - 3s/epoch - 1ms/step
Epoch 18/20
2545/2545 - 3s - loss: 0.5014 - val_loss: 0.4992 - 3s/epoch - 1ms/step
Epoch 19/20
2545/2545 - 3s - loss: 0.5018 - val_loss: 0.4983 - 3s/epoch - 1ms/step
Epoch 20/20
2545/2545 - 3s - loss: 0.5014 - val_loss: 0.4996 - 3s/epoch - 1ms/step

Run completed: runs/2023-05-10T03-47-40Z

Training run 6/12 (flags = list(0.001, 8, 128, 64, 8, 0.1)) 
Using run directory runs/2023-05-10T03-48-37Z

> FLAGS= flags(
+   flag_numeric("learning_rate", 0.01),
+   flag_numeric("units1", 32),
+   flag_numeric('units2', 16),
+   flag_numeric('units3', 8) .... [TRUNCATED] 

> model2 <- keras_model_sequential() %>%
+   layer_dense(units = FLAGS$units1, activation = "relu",
+               input_shape = dim(train_encoded[]) .... [TRUNCATED] 

> opt= optimizer_adam(learning_rate= FLAGS$learning_rate)

> model2 %>% compile(
+   loss = "binary_crossentropy",
+   optimizer = opt )

> history <- model %>% fit(as.matrix(train_encoded), training_labels,
+                          epochs = 20,
+                          batch_size =  .... [TRUNCATED] 
Epoch 1/20
WARNING:tensorflow:Callback method `on_train_batch_end` is slow compared to the batch time (batch time: 0.0009s vs `on_train_batch_end` time: 0.0032s). Check your callbacks.
2545/2545 - 3s - loss: 0.4999 - val_loss: 0.4983 - 3s/epoch - 1ms/step
Epoch 2/20
2545/2545 - 3s - loss: 0.5017 - val_loss: 0.5023 - 3s/epoch - 1ms/step
Epoch 3/20
2545/2545 - 3s - loss: 0.5005 - val_loss: 0.4989 - 3s/epoch - 1ms/step
Epoch 4/20
2545/2545 - 3s - loss: 0.5005 - val_loss: 0.4995 - 3s/epoch - 1ms/step
Epoch 5/20
2545/2545 - 3s - loss: 0.5014 - val_loss: 0.4989 - 3s/epoch - 1ms/step
Epoch 6/20
2545/2545 - 3s - loss: 0.5005 - val_loss: 0.4997 - 3s/epoch - 1ms/step
Epoch 7/20
2545/2545 - 3s - loss: 0.4992 - val_loss: 0.4998 - 3s/epoch - 1ms/step
Epoch 8/20
2545/2545 - 3s - loss: 0.5002 - val_loss: 0.4990 - 3s/epoch - 1ms/step
Epoch 9/20
2545/2545 - 3s - loss: 0.5002 - val_loss: 0.5011 - 3s/epoch - 1ms/step
Epoch 10/20
2545/2545 - 3s - loss: 0.4999 - val_loss: 0.5005 - 3s/epoch - 1ms/step
Epoch 11/20
2545/2545 - 3s - loss: 0.5008 - val_loss: 0.5004 - 3s/epoch - 1ms/step
Epoch 12/20
2545/2545 - 3s - loss: 0.5014 - val_loss: 0.5007 - 3s/epoch - 1ms/step
Epoch 13/20
2545/2545 - 3s - loss: 0.5014 - val_loss: 0.5007 - 3s/epoch - 1ms/step
Epoch 14/20
2545/2545 - 3s - loss: 0.5009 - val_loss: 0.5008 - 3s/epoch - 1ms/step
Epoch 15/20
2545/2545 - 3s - loss: 0.5025 - val_loss: 0.5004 - 3s/epoch - 1ms/step
Epoch 16/20
2545/2545 - 3s - loss: 0.5007 - val_loss: 0.5019 - 3s/epoch - 1ms/step
Epoch 17/20
2545/2545 - 3s - loss: 0.4999 - val_loss: 0.5017 - 3s/epoch - 1ms/step
Epoch 18/20
2545/2545 - 3s - loss: 0.5002 - val_loss: 0.4992 - 3s/epoch - 1ms/step
Epoch 19/20
2545/2545 - 3s - loss: 0.5014 - val_loss: 0.5008 - 3s/epoch - 1ms/step
Epoch 20/20
2545/2545 - 3s - loss: 0.5002 - val_loss: 0.4992 - 3s/epoch - 1ms/step

Run completed: runs/2023-05-10T03-48-37Z

Training run 7/12 (flags = list(0.01, 32, 16, 8, 32, 0.5)) 
Using run directory runs/2023-05-10T03-49-35Z

> FLAGS= flags(
+   flag_numeric("learning_rate", 0.01),
+   flag_numeric("units1", 32),
+   flag_numeric('units2', 16),
+   flag_numeric('units3', 8) .... [TRUNCATED] 

> model2 <- keras_model_sequential() %>%
+   layer_dense(units = FLAGS$units1, activation = "relu",
+               input_shape = dim(train_encoded[]) .... [TRUNCATED] 

> opt= optimizer_adam(learning_rate= FLAGS$learning_rate)

> model2 %>% compile(
+   loss = "binary_crossentropy",
+   optimizer = opt )

> history <- model %>% fit(as.matrix(train_encoded), training_labels,
+                          epochs = 20,
+                          batch_size =  .... [TRUNCATED] 
Epoch 1/20
WARNING:tensorflow:Callback method `on_train_batch_end` is slow compared to the batch time (batch time: 0.0008s vs `on_train_batch_end` time: 0.0030s). Check your callbacks.
2545/2545 - 3s - loss: 0.5018 - val_loss: 0.4991 - 3s/epoch - 1ms/step
Epoch 2/20
2545/2545 - 3s - loss: 0.5009 - val_loss: 0.4980 - 3s/epoch - 1ms/step
Epoch 3/20
2545/2545 - 3s - loss: 0.5003 - val_loss: 0.4997 - 3s/epoch - 1ms/step
Epoch 4/20
2545/2545 - 3s - loss: 0.5006 - val_loss: 0.5008 - 3s/epoch - 1ms/step
Epoch 5/20
2545/2545 - 3s - loss: 0.5001 - val_loss: 0.5012 - 3s/epoch - 1ms/step
Epoch 6/20
2545/2545 - 3s - loss: 0.5005 - val_loss: 0.5008 - 3s/epoch - 1ms/step
Epoch 7/20
2545/2545 - 3s - loss: 0.5015 - val_loss: 0.4985 - 3s/epoch - 1ms/step
Epoch 8/20
2545/2545 - 3s - loss: 0.5003 - val_loss: 0.4987 - 3s/epoch - 1ms/step
Epoch 9/20
2545/2545 - 3s - loss: 0.5002 - val_loss: 0.4994 - 3s/epoch - 1ms/step
Epoch 10/20
2545/2545 - 3s - loss: 0.4997 - val_loss: 0.5008 - 3s/epoch - 1ms/step
Epoch 11/20
2545/2545 - 3s - loss: 0.5005 - val_loss: 0.4993 - 3s/epoch - 1ms/step
Epoch 12/20
2545/2545 - 3s - loss: 0.5001 - val_loss: 0.4994 - 3s/epoch - 1ms/step
Epoch 13/20
2545/2545 - 3s - loss: 0.5001 - val_loss: 0.4993 - 3s/epoch - 1ms/step
Epoch 14/20
2545/2545 - 3s - loss: 0.5016 - val_loss: 0.4994 - 3s/epoch - 1ms/step
Epoch 15/20
2545/2545 - 3s - loss: 0.4997 - val_loss: 0.4988 - 3s/epoch - 1ms/step
Epoch 16/20
2545/2545 - 3s - loss: 0.5009 - val_loss: 0.4988 - 3s/epoch - 1ms/step
Epoch 17/20
2545/2545 - 3s - loss: 0.4988 - val_loss: 0.4992 - 3s/epoch - 1ms/step
Epoch 18/20
2545/2545 - 3s - loss: 0.4998 - val_loss: 0.5005 - 3s/epoch - 1ms/step
Epoch 19/20
2545/2545 - 3s - loss: 0.5000 - val_loss: 0.5004 - 3s/epoch - 1ms/step
Epoch 20/20
2545/2545 - 3s - loss: 0.5003 - val_loss: 0.5023 - 3s/epoch - 1ms/step

Run completed: runs/2023-05-10T03-49-35Z

Training run 8/12 (flags = list(0.01, 512, 64, 16, 64, 0.2)) 
Using run directory runs/2023-05-10T03-50-32Z

> FLAGS= flags(
+   flag_numeric("learning_rate", 0.01),
+   flag_numeric("units1", 32),
+   flag_numeric('units2', 16),
+   flag_numeric('units3', 8) .... [TRUNCATED] 

> model2 <- keras_model_sequential() %>%
+   layer_dense(units = FLAGS$units1, activation = "relu",
+               input_shape = dim(train_encoded[]) .... [TRUNCATED] 

> opt= optimizer_adam(learning_rate= FLAGS$learning_rate)

> model2 %>% compile(
+   loss = "binary_crossentropy",
+   optimizer = opt )

> history <- model %>% fit(as.matrix(train_encoded), training_labels,
+                          epochs = 20,
+                          batch_size =  .... [TRUNCATED] 
Epoch 1/20
WARNING:tensorflow:Callback method `on_train_batch_end` is slow compared to the batch time (batch time: 0.0008s vs `on_train_batch_end` time: 0.0029s). Check your callbacks.
2545/2545 - 3s - loss: 0.4995 - val_loss: 0.5000 - 3s/epoch - 1ms/step
Epoch 2/20
2545/2545 - 3s - loss: 0.5005 - val_loss: 0.5011 - 3s/epoch - 1ms/step
Epoch 3/20
2545/2545 - 3s - loss: 0.5005 - val_loss: 0.5013 - 3s/epoch - 1ms/step
Epoch 4/20
2545/2545 - 3s - loss: 0.4998 - val_loss: 0.5033 - 3s/epoch - 1ms/step
Epoch 5/20
2545/2545 - 3s - loss: 0.4998 - val_loss: 0.5016 - 3s/epoch - 1ms/step
Epoch 6/20
2545/2545 - 3s - loss: 0.5002 - val_loss: 0.5010 - 3s/epoch - 1ms/step
Epoch 7/20
2545/2545 - 3s - loss: 0.5003 - val_loss: 0.4996 - 3s/epoch - 1ms/step
Epoch 8/20
2545/2545 - 3s - loss: 0.5012 - val_loss: 0.5005 - 3s/epoch - 1ms/step
Epoch 9/20
2545/2545 - 3s - loss: 0.4999 - val_loss: 0.4997 - 3s/epoch - 1ms/step
Epoch 10/20
2545/2545 - 3s - loss: 0.5006 - val_loss: 0.4999 - 3s/epoch - 1ms/step
Epoch 11/20
2545/2545 - 3s - loss: 0.4999 - val_loss: 0.5008 - 3s/epoch - 1ms/step
Epoch 12/20
2545/2545 - 3s - loss: 0.5002 - val_loss: 0.4997 - 3s/epoch - 1ms/step
Epoch 13/20
2545/2545 - 3s - loss: 0.5000 - val_loss: 0.5011 - 3s/epoch - 1ms/step
Epoch 14/20
2545/2545 - 3s - loss: 0.4997 - val_loss: 0.4995 - 3s/epoch - 1ms/step
Epoch 15/20
2545/2545 - 3s - loss: 0.4991 - val_loss: 0.5003 - 3s/epoch - 1ms/step
Epoch 16/20
2545/2545 - 3s - loss: 0.5001 - val_loss: 0.4991 - 3s/epoch - 1ms/step
Epoch 17/20
2545/2545 - 3s - loss: 0.4991 - val_loss: 0.5006 - 3s/epoch - 1ms/step
Epoch 18/20
2545/2545 - 3s - loss: 0.5004 - val_loss: 0.4994 - 3s/epoch - 1ms/step
Epoch 19/20
2545/2545 - 3s - loss: 0.5002 - val_loss: 0.5027 - 3s/epoch - 1ms/step
Epoch 20/20
2545/2545 - 3s - loss: 0.4996 - val_loss: 0.5005 - 3s/epoch - 1ms/step

Run completed: runs/2023-05-10T03-50-32Z

Training run 9/12 (flags = list(0.1, 128, 128, 64, 8, 0.3)) 
Using run directory runs/2023-05-10T03-51-29Z

> FLAGS= flags(
+   flag_numeric("learning_rate", 0.01),
+   flag_numeric("units1", 32),
+   flag_numeric('units2', 16),
+   flag_numeric('units3', 8) .... [TRUNCATED] 

> model2 <- keras_model_sequential() %>%
+   layer_dense(units = FLAGS$units1, activation = "relu",
+               input_shape = dim(train_encoded[]) .... [TRUNCATED] 

> opt= optimizer_adam(learning_rate= FLAGS$learning_rate)

> model2 %>% compile(
+   loss = "binary_crossentropy",
+   optimizer = opt )

> history <- model %>% fit(as.matrix(train_encoded), training_labels,
+                          epochs = 20,
+                          batch_size =  .... [TRUNCATED] 
Epoch 1/20
WARNING:tensorflow:Callback method `on_train_batch_end` is slow compared to the batch time (batch time: 0.0009s vs `on_train_batch_end` time: 0.0030s). Check your callbacks.
2545/2545 - 3s - loss: 0.5004 - val_loss: 0.5029 - 3s/epoch - 1ms/step
Epoch 2/20
2545/2545 - 3s - loss: 0.4993 - val_loss: 0.5030 - 3s/epoch - 1ms/step
Epoch 3/20
2545/2545 - 3s - loss: 0.4999 - val_loss: 0.5039 - 3s/epoch - 1ms/step
Epoch 4/20
2545/2545 - 3s - loss: 0.5008 - val_loss: 0.5004 - 3s/epoch - 1ms/step
Epoch 5/20
2545/2545 - 3s - loss: 0.5007 - val_loss: 0.5029 - 3s/epoch - 1ms/step
Epoch 6/20
2545/2545 - 3s - loss: 0.5002 - val_loss: 0.5009 - 3s/epoch - 1ms/step
Epoch 7/20
2545/2545 - 3s - loss: 0.4991 - val_loss: 0.5011 - 3s/epoch - 1ms/step
Epoch 8/20
2545/2545 - 3s - loss: 0.4995 - val_loss: 0.5032 - 3s/epoch - 1ms/step
Epoch 9/20
2545/2545 - 3s - loss: 0.5002 - val_loss: 0.4995 - 3s/epoch - 1ms/step
Epoch 10/20
2545/2545 - 3s - loss: 0.5007 - val_loss: 0.4999 - 3s/epoch - 1ms/step
Epoch 11/20
2545/2545 - 3s - loss: 0.5003 - val_loss: 0.5006 - 3s/epoch - 1ms/step
Epoch 12/20
2545/2545 - 3s - loss: 0.5002 - val_loss: 0.5024 - 3s/epoch - 1ms/step
Epoch 13/20
2545/2545 - 3s - loss: 0.4998 - val_loss: 0.5016 - 3s/epoch - 1ms/step
Epoch 14/20
2545/2545 - 3s - loss: 0.4991 - val_loss: 0.5016 - 3s/epoch - 1ms/step
Epoch 15/20
2545/2545 - 3s - loss: 0.5000 - val_loss: 0.5007 - 3s/epoch - 1ms/step
Epoch 16/20
2545/2545 - 3s - loss: 0.5007 - val_loss: 0.5002 - 3s/epoch - 1ms/step
Epoch 17/20
2545/2545 - 3s - loss: 0.5007 - val_loss: 0.4998 - 3s/epoch - 1ms/step
Epoch 18/20
2545/2545 - 3s - loss: 0.4996 - val_loss: 0.5031 - 3s/epoch - 1ms/step
Epoch 19/20
2545/2545 - 3s - loss: 0.5003 - val_loss: 0.5001 - 3s/epoch - 1ms/step
Epoch 20/20
2545/2545 - 3s - loss: 0.5000 - val_loss: 0.5020 - 3s/epoch - 1ms/step

Run completed: runs/2023-05-10T03-51-29Z

Training run 10/12 (flags = list(0.1, 128, 128, 8, 64, 0.5)) 
Using run directory runs/2023-05-10T03-52-26Z

> FLAGS= flags(
+   flag_numeric("learning_rate", 0.01),
+   flag_numeric("units1", 32),
+   flag_numeric('units2', 16),
+   flag_numeric('units3', 8) .... [TRUNCATED] 

> model2 <- keras_model_sequential() %>%
+   layer_dense(units = FLAGS$units1, activation = "relu",
+               input_shape = dim(train_encoded[]) .... [TRUNCATED] 

> opt= optimizer_adam(learning_rate= FLAGS$learning_rate)

> model2 %>% compile(
+   loss = "binary_crossentropy",
+   optimizer = opt )

> history <- model %>% fit(as.matrix(train_encoded), training_labels,
+                          epochs = 20,
+                          batch_size =  .... [TRUNCATED] 
Epoch 1/20
WARNING:tensorflow:Callback method `on_train_batch_end` is slow compared to the batch time (batch time: 0.0008s vs `on_train_batch_end` time: 0.0028s). Check your callbacks.
2545/2545 - 3s - loss: 0.4997 - val_loss: 0.5005 - 3s/epoch - 1ms/step
Epoch 2/20
2545/2545 - 3s - loss: 0.4994 - val_loss: 0.5029 - 3s/epoch - 1ms/step
Epoch 3/20
2545/2545 - 3s - loss: 0.5000 - val_loss: 0.5009 - 3s/epoch - 1ms/step
Epoch 4/20
2545/2545 - 3s - loss: 0.4992 - val_loss: 0.5003 - 3s/epoch - 1ms/step
Epoch 5/20
2545/2545 - 3s - loss: 0.4986 - val_loss: 0.5025 - 3s/epoch - 1ms/step
Epoch 6/20
2545/2545 - 3s - loss: 0.4997 - val_loss: 0.5015 - 3s/epoch - 1ms/step
Epoch 7/20
2545/2545 - 3s - loss: 0.4984 - val_loss: 0.5007 - 3s/epoch - 1ms/step
Epoch 8/20
2545/2545 - 3s - loss: 0.4996 - val_loss: 0.4996 - 3s/epoch - 1ms/step
Epoch 9/20
2545/2545 - 3s - loss: 0.4999 - val_loss: 0.5049 - 3s/epoch - 1ms/step
Epoch 10/20
2545/2545 - 3s - loss: 0.4999 - val_loss: 0.5003 - 3s/epoch - 1ms/step
Epoch 11/20
2545/2545 - 3s - loss: 0.4990 - val_loss: 0.5016 - 3s/epoch - 1ms/step
Epoch 12/20
2545/2545 - 3s - loss: 0.5001 - val_loss: 0.4995 - 3s/epoch - 1ms/step
Epoch 13/20
2545/2545 - 3s - loss: 0.4995 - val_loss: 0.5010 - 3s/epoch - 1ms/step
Epoch 14/20
2545/2545 - 3s - loss: 0.4989 - val_loss: 0.5007 - 3s/epoch - 1ms/step
Epoch 15/20
2545/2545 - 3s - loss: 0.5001 - val_loss: 0.5029 - 3s/epoch - 1ms/step
Epoch 16/20
2545/2545 - 3s - loss: 0.4997 - val_loss: 0.5013 - 3s/epoch - 1ms/step
Epoch 17/20
2545/2545 - 3s - loss: 0.4995 - val_loss: 0.5040 - 3s/epoch - 1ms/step
Epoch 18/20
2545/2545 - 3s - loss: 0.4980 - val_loss: 0.4997 - 3s/epoch - 1ms/step
Epoch 19/20
2545/2545 - 3s - loss: 0.4989 - val_loss: 0.5000 - 3s/epoch - 1ms/step
Epoch 20/20
2545/2545 - 3s - loss: 0.4988 - val_loss: 0.5021 - 3s/epoch - 1ms/step

Run completed: runs/2023-05-10T03-52-26Z

Training run 11/12 (flags = list(0.1, 512, 8, 128, 16, 0.3)) 
Using run directory runs/2023-05-10T03-53-23Z

> FLAGS= flags(
+   flag_numeric("learning_rate", 0.01),
+   flag_numeric("units1", 32),
+   flag_numeric('units2', 16),
+   flag_numeric('units3', 8) .... [TRUNCATED] 

> model2 <- keras_model_sequential() %>%
+   layer_dense(units = FLAGS$units1, activation = "relu",
+               input_shape = dim(train_encoded[]) .... [TRUNCATED] 

> opt= optimizer_adam(learning_rate= FLAGS$learning_rate)

> model2 %>% compile(
+   loss = "binary_crossentropy",
+   optimizer = opt )

> history <- model %>% fit(as.matrix(train_encoded), training_labels,
+                          epochs = 20,
+                          batch_size =  .... [TRUNCATED] 
Epoch 1/20
WARNING:tensorflow:Callback method `on_train_batch_end` is slow compared to the batch time (batch time: 0.0008s vs `on_train_batch_end` time: 0.0029s). Check your callbacks.
2545/2545 - 3s - loss: 0.5003 - val_loss: 0.5008 - 3s/epoch - 1ms/step
Epoch 2/20
2545/2545 - 3s - loss: 0.4994 - val_loss: 0.5009 - 3s/epoch - 1ms/step
Epoch 3/20
2545/2545 - 3s - loss: 0.4998 - val_loss: 0.5017 - 3s/epoch - 1ms/step
Epoch 4/20
2545/2545 - 3s - loss: 0.4992 - val_loss: 0.5030 - 3s/epoch - 1ms/step
Epoch 5/20
2545/2545 - 3s - loss: 0.4997 - val_loss: 0.5007 - 3s/epoch - 1ms/step
Epoch 6/20
2545/2545 - 3s - loss: 0.4996 - val_loss: 0.5029 - 3s/epoch - 1ms/step
Epoch 7/20
2545/2545 - 3s - loss: 0.5002 - val_loss: 0.5008 - 3s/epoch - 1ms/step
Epoch 8/20
2545/2545 - 3s - loss: 0.5001 - val_loss: 0.5017 - 3s/epoch - 1ms/step
Epoch 9/20
2545/2545 - 3s - loss: 0.4994 - val_loss: 0.4997 - 3s/epoch - 1ms/step
Epoch 10/20
2545/2545 - 3s - loss: 0.4997 - val_loss: 0.5008 - 3s/epoch - 1ms/step
Epoch 11/20
2545/2545 - 3s - loss: 0.5003 - val_loss: 0.4995 - 3s/epoch - 1ms/step
Epoch 12/20
2545/2545 - 3s - loss: 0.5000 - val_loss: 0.5027 - 3s/epoch - 1ms/step
Epoch 13/20
2545/2545 - 3s - loss: 0.4995 - val_loss: 0.5015 - 3s/epoch - 1ms/step
Epoch 14/20
2545/2545 - 3s - loss: 0.4996 - val_loss: 0.5010 - 3s/epoch - 1ms/step
Epoch 15/20
2545/2545 - 3s - loss: 0.5000 - val_loss: 0.5011 - 3s/epoch - 1ms/step
Epoch 16/20
2545/2545 - 3s - loss: 0.4985 - val_loss: 0.5027 - 3s/epoch - 1ms/step
Epoch 17/20
2545/2545 - 3s - loss: 0.4995 - val_loss: 0.5009 - 3s/epoch - 1ms/step
Epoch 18/20
2545/2545 - 3s - loss: 0.4990 - val_loss: 0.5020 - 3s/epoch - 1ms/step
Epoch 19/20
2545/2545 - 3s - loss: 0.5000 - val_loss: 0.5012 - 3s/epoch - 1ms/step
Epoch 20/20
2545/2545 - 3s - loss: 0.4985 - val_loss: 0.5018 - 3s/epoch - 1ms/step

Run completed: runs/2023-05-10T03-53-23Z

Training run 12/12 (flags = list(0.001, 64, 32, 128, 64, 0.2)) 
Using run directory runs/2023-05-10T03-54-21Z

> FLAGS= flags(
+   flag_numeric("learning_rate", 0.01),
+   flag_numeric("units1", 32),
+   flag_numeric('units2', 16),
+   flag_numeric('units3', 8) .... [TRUNCATED] 

> model2 <- keras_model_sequential() %>%
+   layer_dense(units = FLAGS$units1, activation = "relu",
+               input_shape = dim(train_encoded[]) .... [TRUNCATED] 

> opt= optimizer_adam(learning_rate= FLAGS$learning_rate)

> model2 %>% compile(
+   loss = "binary_crossentropy",
+   optimizer = opt )

> history <- model %>% fit(as.matrix(train_encoded), training_labels,
+                          epochs = 20,
+                          batch_size =  .... [TRUNCATED] 
Epoch 1/20
WARNING:tensorflow:Callback method `on_train_batch_end` is slow compared to the batch time (batch time: 0.0009s vs `on_train_batch_end` time: 0.0024s). Check your callbacks.
2545/2545 - 3s - loss: 0.4996 - val_loss: 0.5005 - 3s/epoch - 1ms/step
Epoch 2/20
2545/2545 - 3s - loss: 0.4994 - val_loss: 0.5020 - 3s/epoch - 1ms/step
Epoch 3/20
2545/2545 - 3s - loss: 0.4990 - val_loss: 0.5009 - 3s/epoch - 1ms/step
Epoch 4/20
2545/2545 - 4s - loss: 0.4996 - val_loss: 0.5005 - 4s/epoch - 1ms/step
Epoch 5/20
2545/2545 - 3s - loss: 0.4994 - val_loss: 0.5019 - 3s/epoch - 1ms/step
Epoch 6/20
2545/2545 - 3s - loss: 0.4994 - val_loss: 0.5006 - 3s/epoch - 1ms/step
Epoch 7/20
2545/2545 - 3s - loss: 0.4991 - val_loss: 0.5014 - 3s/epoch - 1ms/step
Epoch 8/20
2545/2545 - 4s - loss: 0.5001 - val_loss: 0.5016 - 4s/epoch - 2ms/step
Epoch 9/20
2545/2545 - 4s - loss: 0.4989 - val_loss: 0.5036 - 4s/epoch - 2ms/step
Epoch 10/20
2545/2545 - 4s - loss: 0.5002 - val_loss: 0.5013 - 4s/epoch - 2ms/step
Epoch 11/20
2545/2545 - 4s - loss: 0.4996 - val_loss: 0.5027 - 4s/epoch - 2ms/step
Epoch 12/20
2545/2545 - 4s - loss: 0.4992 - val_loss: 0.5016 - 4s/epoch - 2ms/step
Epoch 13/20
2545/2545 - 4s - loss: 0.4988 - val_loss: 0.5056 - 4s/epoch - 2ms/step
Epoch 14/20
2545/2545 - 4s - loss: 0.4993 - val_loss: 0.5020 - 4s/epoch - 1ms/step
Epoch 15/20
2545/2545 - 4s - loss: 0.5004 - val_loss: 0.5001 - 4s/epoch - 2ms/step
Epoch 16/20
2545/2545 - 3s - loss: 0.4991 - val_loss: 0.5025 - 3s/epoch - 1ms/step
Epoch 17/20
2545/2545 - 4s - loss: 0.4997 - val_loss: 0.5040 - 4s/epoch - 2ms/step
Epoch 18/20
2545/2545 - 3s - loss: 0.4993 - val_loss: 0.5020 - 3s/epoch - 1ms/step
Epoch 19/20
2545/2545 - 3s - loss: 0.4978 - val_loss: 0.5073 - 3s/epoch - 1ms/step
Epoch 20/20
2545/2545 - 3s - loss: 0.4987 - val_loss: 0.5010 - 3s/epoch - 1ms/step

Run completed: runs/2023-05-10T03-54-21Z
view_run(runs$run_dir[1])
Warning: incomplete final line found on '/var/folders/fk/7yp8kndx0634lxz4zzv85g9w0000gp/T//RtmpIweksP/fileaf661d4a828f/source/tuning_script_1.R'Warning: incomplete final line found on '/var/folders/fk/7yp8kndx0634lxz4zzv85g9w0000gp/T//RtmpIweksP/fileaf661d4a828f/source/tuning_script_2.R'Warning: incomplete final line found on '/var/folders/fk/7yp8kndx0634lxz4zzv85g9w0000gp/T//RtmpIweksP/fileaf661d4a828f/source/tuning_script_3.R'
knitr::include_graphics("final_plot2.png")

if (runs[1, ]$metric_loss > runs[1, ]$metric_val_loss) {
  print("The model doesn't overfit.")
} else {
  print("The model overfits.")
}
[1] "The model overfits."
library(keras)
library(tfruns)

best_model <- keras_model_sequential() %>%
  layer_dense(units = 512, activation = "relu", input_shape = dim(train_encoded)[2]) %>%
  layer_dropout(rate = 0.2) %>%
  layer_dense(units = 16, activation = "relu") %>%
  layer_dropout(rate = 0.2) %>%
  layer_dense(units = 8, activation = "relu") %>%
  layer_dropout(rate = 0.2) %>%
  layer_dense(units = 1, activation = "sigmoid")

opt= optimizer_adam(learning_rate=0.01)
best_model %>% compile(
  loss = "binary_crossentropy",
  optimizer = opt)

combined_train_x= rbind(train_encoded, val_encoded)
combined_train_y= c(training_labels, validation_labels)

history <- best_model %>% fit(as.matrix(combined_train_x),
                         combined_train_y,
                         batch_size=64,
                         epochs = 20, verbose=2)
Epoch 1/20
884/884 - 3s - loss: 0.5387 - 3s/epoch - 4ms/step
Epoch 2/20
884/884 - 2s - loss: 0.5285 - 2s/epoch - 2ms/step
Epoch 3/20
884/884 - 1s - loss: 0.5266 - 1s/epoch - 2ms/step
Epoch 4/20
884/884 - 1s - loss: 0.5268 - 1s/epoch - 2ms/step
Epoch 5/20
884/884 - 2s - loss: 0.5246 - 2s/epoch - 2ms/step
Epoch 6/20
884/884 - 2s - loss: 0.5264 - 2s/epoch - 2ms/step
Epoch 7/20
884/884 - 1s - loss: 0.5245 - 1s/epoch - 2ms/step
Epoch 8/20
884/884 - 1s - loss: 0.5245 - 1s/epoch - 2ms/step
Epoch 9/20
884/884 - 1s - loss: 0.5271 - 1s/epoch - 2ms/step
Epoch 10/20
884/884 - 2s - loss: 0.5241 - 2s/epoch - 2ms/step
Epoch 11/20
884/884 - 2s - loss: 0.5243 - 2s/epoch - 2ms/step
Epoch 12/20
884/884 - 1s - loss: 0.5239 - 1s/epoch - 2ms/step
Epoch 13/20
884/884 - 2s - loss: 0.5244 - 2s/epoch - 2ms/step
Epoch 14/20
884/884 - 1s - loss: 0.5252 - 1s/epoch - 2ms/step
Epoch 15/20
884/884 - 2s - loss: 0.5236 - 2s/epoch - 2ms/step
Epoch 16/20
884/884 - 2s - loss: 0.5229 - 2s/epoch - 2ms/step
Epoch 17/20
884/884 - 1s - loss: 0.5233 - 1s/epoch - 2ms/step
Epoch 18/20
884/884 - 1s - loss: 0.5236 - 1s/epoch - 2ms/step
Epoch 19/20
884/884 - 2s - loss: 0.5238 - 2s/epoch - 2ms/step
Epoch 20/20
884/884 - 2s - loss: 0.5247 - 2s/epoch - 2ms/step
best_model_predictions <- best_model %>% predict(as.matrix(test_encoded))

  1/442 [..............................] - ETA: 26s
 67/442 [===>..........................] - ETA: 0s 
133/442 [========>.....................] - ETA: 0s
191/442 [===========>..................] - ETA: 0s
256/442 [================>.............] - ETA: 0s
315/442 [====================>.........] - ETA: 0s
381/442 [========================>.....] - ETA: 0s
437/442 [============================>.] - ETA: 0s
442/442 [==============================] - 0s 831us/step

442/442 [==============================] - 0s 831us/step
predicted_labels <- as.factor(ifelse(best_model_predictions <= 0.5, 0, 1)[,1])
confusion_matrix_class0 <- confusionMatrix(predicted_labels, as.factor(test_labels), mode='everything')
print(confusion_matrix_class0)
Confusion Matrix and Statistics

          Reference
Prediction    0    1
         0 4910 1336
         1 2159 5733
                                          
               Accuracy : 0.7528          
                 95% CI : (0.7456, 0.7599)
    No Information Rate : 0.5             
    P-Value [Acc > NIR] : < 2.2e-16       
                                          
                  Kappa : 0.5056          
                                          
 Mcnemar's Test P-Value : < 2.2e-16       
                                          
            Sensitivity : 0.6946          
            Specificity : 0.8110          
         Pos Pred Value : 0.7861          
         Neg Pred Value : 0.7264          
              Precision : 0.7861          
                 Recall : 0.6946          
                     F1 : 0.7375          
             Prevalence : 0.5000          
         Detection Rate : 0.3473          
   Detection Prevalence : 0.4418          
      Balanced Accuracy : 0.7528          
                                          
       'Positive' Class : 0               
                                          
confusion_matrix_class1 <- confusionMatrix(predicted_labels, as.factor(test_labels), mode='everything', positive='1')
print(confusion_matrix_class1)
Confusion Matrix and Statistics

          Reference
Prediction    0    1
         0 4910 1336
         1 2159 5733
                                          
               Accuracy : 0.7528          
                 95% CI : (0.7456, 0.7599)
    No Information Rate : 0.5             
    P-Value [Acc > NIR] : < 2.2e-16       
                                          
                  Kappa : 0.5056          
                                          
 Mcnemar's Test P-Value : < 2.2e-16       
                                          
            Sensitivity : 0.8110          
            Specificity : 0.6946          
         Pos Pred Value : 0.7264          
         Neg Pred Value : 0.7861          
              Precision : 0.7264          
                 Recall : 0.8110          
                     F1 : 0.7664          
             Prevalence : 0.5000          
         Detection Rate : 0.4055          
   Detection Prevalence : 0.5582          
      Balanced Accuracy : 0.7528          
                                          
       'Positive' Class : 1               
                                          
knitr::include_graphics("final_plot3.png")

---
title: "ML Final project"
output: html_notebook
---
# Load diabetes data csv 
```{r}
diabetes_data = read.csv("diabetes_binary.csv")
```

# Check dataframe details with variables list
```{r}
str(diabetes_data)
```
# Convert categorical variable in fators
```{r}
cols_skip <- c('BMI', 'GenHlth', 'MentHlth', 'PhysHlth', 'Age', 'Education', 'Income')
cols_skip_indices <- which(names(diabetes_data) %in% cols_skip)
diabetes_data[, -cols_skip_indices] <- lapply(diabetes_data[, -cols_skip_indices], factor)
str(diabetes_data)
```

# Check diabetes dataset summary
```{r}
summary(diabetes_data)
```
# Check Diabetes data is balanced or imbalanced

```{r}
proportions(table(diabetes_data$Diabetes_binary))
```


```{r}
pie(proportions(table(diabetes_data$Diabetes_binary)), labels = c('Non-Diabetes', 'Diabetes'), col = c('green', 'red'))
```


In the above observation, we can see that the diabetes data is balanced. Data is distributed equally in both non-diabetic and diabetic data (50% each).


# Display list of categorical and numerical variables
```{r}
numerical_variables <- array(, dim = c(0))
categorical_variables <- array(, dim = c(0))

for (c in colnames(diabetes_data)) 
{
  if (is.numeric(diabetes_data[,c])){
      numerical_variables <- c(numerical_variables, c)
  } else if (is.factor(diabetes_data[,c])){
    categorical_variables <- c(categorical_variables, c)
  }
}
cat("Categorical variables:", categorical_variables, "\n","\n")
cat("Numerical variables:", numerical_variables, "\n")
```
# Check is there any missing attributes in dataset.
```{r}
missing_counts <- colSums(is.na(diabetes_data))

cat("Missing Counts in each columns:", "\n")
print(missing_counts)
```


# Use statistical test and plots to find relations between diabetes and other variables. Also removing variables from the dataset which have very weak relation with diabetes variables.
```{r}
diabetes_indices <- which(names(diabetes_data)=='Diabetes_binary')
for (c in colnames(diabetes_data[, -diabetes_indices])) 
{
  if (is.factor(diabetes_data[,c])){
    try({
       pvalue = chisq.test(diabetes_data$Diabetes_binary, diabetes_data[,c])
       cat('pvalue of the chi-square test b/w',c,"and Diabetes is:", pvalue$p.value, '\n')
       mosaicplot(diabetes_data$Diabetes_binary~diabetes_data[,c], shade=TRUE, main=paste("Mosaic Plot of Diabetes vs", c), xlab="Diabetes", ylab=c, las=1) 
    })
  }
  else if (is.numeric(diabetes_data[,c])){
    try({
      pvalue = oneway.test(diabetes_data[,c]~diabetes_data$Diabetes_binary)
      cat('pvalue of the oneway test b/w',c,"and Diabetes is:", pvalue$p.value, '\n')
      boxplot(diabetes_data$Diabetes_binary, diabetes_data[,c], col = '#69b3a2', xlab="Diabetes", ylab=c, main=paste("Box Plot of Diabetes vs", c))
    })
  }
  if (pvalue$p.value > 0.05) {
    housing_data[[c]] <- NULL
    cat('\n', 'Removing',c,"from dataset as it's p-value is greater than 0.05:", pvalue$p.value, '\n')
  }
}
```


# After Statistical test to check the relations between diabetes and other variable and removal of non-related varibale, checking the dataset.
```{r}
str(diabetes_data)
```
# Partition dataset in training and test data using caret function.

```{r}
library(caret)

partition_indices = createDataPartition(diabetes_data$Diabetes_binary, p=0.8, list = FALSE)

diabetes_train_data = diabetes_data[partition_indices, ]
diabetes_train_data
diabetes_test_data = diabetes_data[-partition_indices, ]
diabetes_test_data
true_labels = diabetes_test_data$Diabetes_binary
```

# Training and testing knn model using overscalling data using smote sampling in train control function.

```{r}
library(caret)

trainControl <- trainControl(method = "cv", number = 5)
knn_model <- train(Diabetes_binary ~ ., data = diabetes_train_data, method = "knn", trControl = trainControl)

```

```{r}
knn_predictions <- predict(knn_model, newdata = diabetes_test_data)
```


```{r}
knn_confusion_matrix_class0 <- confusionMatrix(knn_predictions, true_labels, mode='everything')
print(knn_confusion_matrix_class0)
```

```{r}
knn_confusion_matrix_class1 <- confusionMatrix(knn_predictions, true_labels, positive = '1', mode='everything')
print(knn_confusion_matrix_class1)
```


# Lasso model
```{r}
library(glmnet)
library(caret)
set.seed(1)
tr = trainControl(method = "cv", number = 5, preProc = "nzv")
tg = expand.grid(alpha = 1, lambda = 10^seq(-4, -2, length =100))
lasso_model <- train(Diabetes_binary ~ ., data = diabetes_train_data, method = "glmnet", trControl = tr, tuneGrid = tg)
lasso_model
```


```{r}
lasso_predictions <- predict(lasso_model, newdata = diabetes_test_data, na.action = na.pass)
```


```{r}
lasso_confusion_matrix_class0 <- confusionMatrix(lasso_predictions, true_labels, mode='everything')
print(lasso_confusion_matrix_class0)
cat("\n\n")
```


```{r}
lasso_confusion_matrix_class1 <- confusionMatrix(lasso_predictions, true_labels, mode='everything', positive='1')
print(lasso_confusion_matrix_class1)
cat("\n\n")
```

# Ridge Model 

```{r}
library(glmnet)
library(caret)

set.seed(1)
tr = trainControl(method = "cv", number = 5, preProc = "nzv")
tg = expand.grid(alpha = 0, lambda = 10^seq(-3, -1, length =100))
ridge_model <- train(Diabetes_binary ~ ., data = diabetes_train_data, method = "glmnet", trControl = tr, tuneGrid = tg)
ridge_model
```

```{r}
ridge_predictions <- predict(ridge_model, newdata = diabetes_test_data, na.action = na.pass)
```


```{r}
ridge_confusion_matrix_class0 <- confusionMatrix(ridge_predictions, true_labels, mode='everything')
print(ridge_confusion_matrix_class0)
cat("\n\n")
```


```{r}
ridge_confusion_matrix_class1 <- confusionMatrix(ridge_predictions, true_labels, mode='everything', positive='1')
print(ridge_confusion_matrix_class1)
cat("\n\n")
```


# Elastic Net Model
```{r}
library(glmnet)
library(caret)

set.seed(1)
tr = trainControl(method = "cv", number = 5, preProc = "nzv")
tg = expand.grid(alpha =seq(0, 1, length=10), lambda = 10^seq(-3, 1, length = 100))
enet_model <- train(Diabetes_binary ~ ., data = diabetes_train_data, method = "glmnet", trControl = tr, tuneGrid = tg)
enet_model
```

```{r}
enet_predictions <- predict(enet_model, newdata = diabetes_test_data, na.action = na.pass)
```


```{r}
enet_confusion_matrix_class0 <- confusionMatrix(enet_predictions, true_labels, mode='everything')
print(enet_confusion_matrix_class0)
cat("\n\n")
```


```{r}
enet_confusion_matrix_class1 <- confusionMatrix(enet_predictions, true_labels, mode='everything', positive='1')
print(enet_confusion_matrix_class1)
cat("\n\n")
```


# Random Forest Model
```{r}
library(caret)
set.seed(1)
tr = trainControl(method = "cv", number = 5, preProc = "nzv")
rf_model <- train(Diabetes_binary ~ ., data = diabetes_train_data, method = "rf", trControl = tr, importance = TRUE)
rf_model
```

```{r}
varImp(rf_model)
```

```{r}
rf_predictions <- predict(rf_model, newdata = diabetes_test_data, na.action = na.pass)
```

```{r}
rf_confusion_matrix_class0 <- confusionMatrix(rf_predictions, true_labels, mode='everything')
print(rf_confusion_matrix_class0)
cat("\n\n")
```
```{r}
rf_confusion_matrix_class1 <- confusionMatrix(rf_predictions, true_labels, mode='everything', positive='1')
print(rf_confusion_matrix_class1)
cat("\n\n")
```


# GBM without sampling

```{r}
library(caret)
set.seed(1)
tr = trainControl(method = "cv", number = 5, preProc = "nzv")
gbm_model <- train(Diabetes_binary ~ ., data = diabetes_train_data, method = "gbm", trControl = tr)
gbm_model
```

```{r}
gbm_predictions <- predict(gbm_model, newdata = diabetes_test_data, na.action = na.pass)
```

```{r}
gbm_confusion_matrix_class0 <- confusionMatrix(gbm_predictions, true_labels, mode='everything')
print(gbm_confusion_matrix_class0)
cat("\n\n")
```

```{r}
gbm_confusion_matrix_class1 <- confusionMatrix(gbm_predictions, true_labels, mode='everything', positive='1')
print(gbm_confusion_matrix_class1)
cat("\n\n")
```


# SVM Linear Model

```{r}
library(caret)
set.seed(1)
tr = trainControl(method = "cv", number = 5, preProc = "nzv")
svm_linear_model <- train(Diabetes_binary ~ ., data = diabetes_train_data, method = "svmLinear", trControl = tr)
svm_linear_model
```

```{r}
svm_linear_predictions <- predict(svm_linear_model, newdata = diabetes_test_data, na.action = na.pass)
```

```{r}
svm_linear_confusion_matrix_class0 <- confusionMatrix(svm_linear_predictions, true_labels, mode='everything')
print(svm_linear_confusion_matrix_class0)
cat("\n\n")
```

```{r}
svm_linear_confusion_matrix_class1 <- confusionMatrix(svm_linear_predictions, true_labels, mode='everything', positive='1')
print(svm_linear_confusion_matrix_class1)
cat("\n\n")
```


# SVM Radial Model

```{r}
library(caret)
set.seed(1)
tr = trainControl(method = "cv", number = 5, preProc = "nzv")
svm_radial_model <- train(Diabetes_binary ~ ., data = diabetes_train_data, method = "svmRadial", trControl = tr)
svm_radial_model
```

```{r}
svm_radial_predictions <- predict(svm_radial_model, newdata = diabetes_test_data, na.action = na.pass)
```

```{r}
svm_radial_confusion_matrix_class0 <- confusionMatrix(svm_radial_predictions, true_labels, mode='everything')
print(svm_radial_confusion_matrix_class0)
cat("\n\n")
```

```{r}
svm_radial_confusion_matrix_class1 <- confusionMatrix(svm_radial_predictions, true_labels, mode='everything', positive='1')
print(svm_radial_confusion_matrix_class1)
cat("\n\n")
```


# Compare all the sampled models
```{r}
compare=resamples(list(KNN= knn_model, Lasso=lasso_model, Ridge=ridge_model, Enet=enet_model, RF=rf_model, GBM=gbm_model, SVML=svm_linear_model, SVMR=svm_radial_model))
summary(compare)
```

# Preprocessing for nueral network
```{r}
library(caret)

parti_indices = createDataPartition(diabetes_train_data$Diabetes_binary, p=0.9, list = FALSE)
diabetes_index <- which(names(diabetes_train_data)=='Diabetes_binary')

diabetes_train_data1 = diabetes_train_data[parti_indices, -diabetes_index]
diabetes_train_data1
training_labels = diabetes_train_data[parti_indices, diabetes_index]
training_labels <- as.numeric(training_labels) - 1
training_labels
diabetes_validation_data = diabetes_train_data[-parti_indices, -diabetes_index]
diabetes_validation_data
validation_labels = diabetes_train_data[-parti_indices, diabetes_index]
validation_labels <- as.numeric(validation_labels) - 1
validation_labels
diabetes_testing_data = diabetes_test_data[, -diabetes_index]
diabetes_testing_data
test_labels = as.numeric(diabetes_test_data[,diabetes_index]) - 1
test_labels
```

```{r}
library(caret)

str(diabetes_train_data1)
preproc <- preProcess(diabetes_train_data1, method = c("center", "scale"))

train_imputed <- predict(preproc, diabetes_train_data1)

str(train_imputed)
train_imputed
test_imputed <- predict(preproc, diabetes_testing_data)
test_imputed
val_imputed <- predict(preproc, diabetes_validation_data)
val_imputed
```


```{r}
library(data.table)
library(mltools)

train_encoded <- one_hot(as.data.table(train_imputed), dropUnusedLevels = FALSE)
train_encoded
test_encoded <- one_hot(as.data.table(test_imputed), dropUnusedLevels = FALSE)
test_encoded
val_encoded <- one_hot(as.data.table(val_imputed), dropUnusedLevels = FALSE)
val_encoded
```

```{r}
nzv_indices <- nearZeroVar(train_encoded, saveMetrics= TRUE)

train_encoded_nzv <- train_encoded[,-nzv_indices]
val_encoded_nzv <- val_encoded[,-nzv_indices]
test_encoded_nzv <- test_encoded[,-nzv_indices]
train_encoded
val_encoded
test_encoded
```

```{r}
library(keras)

model <- keras_model_sequential() %>%
  layer_dense(units = 32, activation = "relu", input_shape = dim(train_encoded)[2]) %>%
  layer_dropout(rate = 0.2) %>%
  layer_dense(units = 16, activation = "relu") %>%
  layer_dropout(rate = 0.2) %>%
  layer_dense(units = 1, activation = "sigmoid")

model %>% compile(
  loss = "binary_crossentropy",
  optimizer = "adam"
)
 
history <- model %>% fit(as.matrix(train_encoded), training_labels,
  epochs = 20,
  batch_size = 20, verbose=2,
  validation_data = list(as.matrix(val_encoded), validation_labels)
)

```

```{r}
knitr::include_graphics("final_plot1.png")
```

```{r}
predictions <- model %>% predict(as.matrix(test_encoded))
```


```{r}
predicted_labels <- as.factor(ifelse(predictions <= 0.5, 0, 1)[,1])
confusion_matrix_class0 <- confusionMatrix(predicted_labels, as.factor(test_labels), mode='everything')
print(confusion_matrix_class0)
```


```{r}
confusion_matrix_class1 <- confusionMatrix(predicted_labels, as.factor(test_labels), mode='everything', positive='1')
print(confusion_matrix_class1)
```



```{r}
library(keras)
library(tfruns)

runs= tuning_run("tuning_script_5.R",
                 flags=list(
                 learning_rate=c(0.1, 0.5, 0.01, 0.001),
                 units1=c(8, 16, 32, 64, 128, 512),
                 units2=c(8, 16, 32, 64, 128),
                 units2=c(8, 16, 32, 64, 128),
                 batch_size=c(8,16, 32, 64),
                 dropout=c(0.1, 0.2, 0.3, 0.4, 0.5)
                 ),
                 sample= 0.001
)
```

```{r}
view_run(runs$run_dir[1])
```
```{r}
knitr::include_graphics("final_plot2.png")
```

```{r}
if (runs[1, ]$metric_loss > runs[1, ]$metric_val_loss) {
  print("The model doesn't overfit.")
} else {
  print("The model overfits.")
}

```


```{r}
library(keras)
library(tfruns)

best_model <- keras_model_sequential() %>%
  layer_dense(units = 512, activation = "relu", input_shape = dim(train_encoded)[2]) %>%
  layer_dropout(rate = 0.2) %>%
  layer_dense(units = 16, activation = "relu") %>%
  layer_dropout(rate = 0.2) %>%
  layer_dense(units = 8, activation = "relu") %>%
  layer_dropout(rate = 0.2) %>%
  layer_dense(units = 1, activation = "sigmoid")

opt= optimizer_adam(learning_rate=0.01)
best_model %>% compile(
  loss = "binary_crossentropy",
  optimizer = opt)

combined_train_x= rbind(train_encoded, val_encoded)
combined_train_y= c(training_labels, validation_labels)

history <- best_model %>% fit(as.matrix(combined_train_x),
                         combined_train_y,
                         batch_size=64,
                         epochs = 20, verbose=2)
```

```{r}
best_model_predictions <- best_model %>% predict(as.matrix(test_encoded))
```


```{r}
predicted_labels <- as.factor(ifelse(best_model_predictions <= 0.5, 0, 1)[,1])
confusion_matrix_class0 <- confusionMatrix(predicted_labels, as.factor(test_labels), mode='everything')
print(confusion_matrix_class0)
```
```{r}
confusion_matrix_class1 <- confusionMatrix(predicted_labels, as.factor(test_labels), mode='everything', positive='1')
print(confusion_matrix_class1)
```

```{r}
knitr::include_graphics("final_plot3.png")
```





